[llvm] f68cc2a - AMDGPU: Use 128-bit DS operations by default
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 14:42:23 PDT 2020
Author: Matt Arsenault
Date: 2020-04-02T17:17:47-04:00
New Revision: f68cc2a7ed766965028b8b0f0d9300a0460c3cf1
URL: https://github.com/llvm/llvm-project/commit/f68cc2a7ed766965028b8b0f0d9300a0460c3cf1
DIFF: https://github.com/llvm/llvm-project/commit/f68cc2a7ed766965028b8b0f0d9300a0460c3cf1.diff
LOG: AMDGPU: Use 128-bit DS operations by default
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
llvm/test/CodeGen/AMDGPU/concat_vectors.ll
llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
llvm/test/CodeGen/AMDGPU/insert-subvector-unused-scratch.ll
llvm/test/CodeGen/AMDGPU/load-local-f32.ll
llvm/test/CodeGen/AMDGPU/load-local-f64.ll
llvm/test/CodeGen/AMDGPU/load-local-i16.ll
llvm/test/CodeGen/AMDGPU/load-local-i32.ll
llvm/test/CodeGen/AMDGPU/load-local-i64.ll
llvm/test/CodeGen/AMDGPU/load-local-i8.ll
llvm/test/CodeGen/AMDGPU/local-64.ll
llvm/test/CodeGen/AMDGPU/reorder-stores.ll
llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
llvm/test/CodeGen/AMDGPU/store-local.ll
llvm/test/CodeGen/AMDGPU/store-v3i64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 5d58b8239ca0..df4c6308fee3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -78,7 +78,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
// unset everything else if it is disabled
// Assuming ECC is enabled is the conservative default.
- SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,+xnack,");
+ SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
index 7e0607dc6ddc..2fb12235ee31 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -mattr=-enable-ds128 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -mattr=+enable-ds128 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI-DS128 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
@@ -1759,24 +1759,12 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
; VI-LABEL: name: test_load_local_s96_align8
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, align 8, addrspace 3)
- ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
- ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0
- ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+ ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 8, addrspace 3)
+ ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
; GFX9-LABEL: name: test_load_local_s96_align8
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, align 8, addrspace 3)
- ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
- ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0
- ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
%0:_(p3) = COPY $vgpr0
%1:_(s96) = G_LOAD %0 :: (load 12, align 8, addrspace 3)
$vgpr0_vgpr1_vgpr2 = COPY %1
@@ -1814,24 +1802,12 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
; VI-LABEL: name: test_load_local_s96_align4
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3)
- ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
- ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0
- ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+ ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
; GFX9-LABEL: name: test_load_local_s96_align4
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3)
- ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
- ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0
- ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](s96)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96)
%0:_(p3) = COPY $vgpr0
%1:_(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2 = COPY %1
@@ -2969,94 +2945,99 @@ body: |
; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3)
- ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+ ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+ ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+ ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+ ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+ ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
- ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+ ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C15]]
; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
- ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
- ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
- ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+ ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C15]]
+ ; VI: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C16]](s16)
; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+ ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C15]]
; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
- ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
- ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C15]]
+ ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C16]](s16)
; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
- ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C15]]
; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
- ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
- ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+ ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C15]]
+ ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C16]](s16)
; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
- ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+ ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C15]]
; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
- ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
- ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+ ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C15]]
+ ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C16]](s16)
; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
- ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
- ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
- ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
- ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
- ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
- ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
- ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
- ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
- ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
- ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
- ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
- ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
- ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
- ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
- ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
- ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
- ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
- ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
- ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+ ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C15]]
; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
- ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
- ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
- ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+ ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C15]]
+ ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C16]](s16)
+ ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
- ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+ ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C15]]
; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
- ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
- ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
- ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+ ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C15]]
+ ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C16]](s16)
+ ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
- ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+ ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C15]]
; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
- ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
- ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
- ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+ ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C15]]
+ ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C16]](s16)
+ ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
- ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+ ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C15]]
; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
- ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
- ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
- ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
- ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
- ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
- ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+ ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]]
+ ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16)
+ ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]]
+ ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; VI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32)
+ ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]]
+ ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+ ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+ ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32)
+ ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]]
+ ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+ ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+ ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32)
; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
- ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
- ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
- ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+ ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+ ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+ ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32)
; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
- ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
- ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128)
+ ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; GFX9-LABEL: name: test_load_local_s128_align16
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -3081,94 +3062,99 @@ body: |
; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+ ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+ ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+ ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+ ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+ ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
- ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+ ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C15]]
; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
- ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
- ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
- ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+ ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C15]]
+ ; GFX9: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C16]](s16)
; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+ ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C15]]
; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
- ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
- ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C15]]
+ ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C16]](s16)
; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
- ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+ ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C15]]
; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
- ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
- ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+ ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C15]]
+ ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C16]](s16)
; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
- ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+ ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C15]]
; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
- ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
- ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+ ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C15]]
+ ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C16]](s16)
; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
- ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
- ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
- ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
- ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
- ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
- ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
- ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
- ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
- ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
- ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
- ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
- ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
- ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
- ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
- ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
- ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
- ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
- ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
- ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+ ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C15]]
; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
- ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
- ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
- ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+ ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C15]]
+ ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C16]](s16)
+ ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
- ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+ ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C15]]
; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
- ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
- ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
- ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+ ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C15]]
+ ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C16]](s16)
+ ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
- ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+ ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C15]]
; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
- ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
- ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
- ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+ ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C15]]
+ ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C16]](s16)
+ ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
- ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+ ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C15]]
; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
- ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
- ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
- ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
- ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
- ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
- ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+ ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]]
+ ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16)
+ ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]]
+ ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; GFX9: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32)
+ ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]]
+ ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+ ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+ ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32)
+ ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]]
+ ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+ ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+ ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32)
; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
- ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
- ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
- ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+ ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+ ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+ ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32)
; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
- ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
- ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128)
+ ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
%0:_(p3) = COPY $vgpr0
%1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -3202,20 +3188,12 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
; VI-LABEL: name: test_load_local_s128_align8
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
+ ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
; GFX9-LABEL: name: test_load_local_s128_align8
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
%0:_(p3) = COPY $vgpr0
%1:_(s128) = G_LOAD %0 :: (load 16, align 8, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -3249,20 +3227,12 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
; VI-LABEL: name: test_load_local_s128_align4
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
+ ; VI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
; GFX9-LABEL: name: test_load_local_s128_align4
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
%0:_(p3) = COPY $vgpr0
%1:_(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -3440,45 +3410,46 @@ body: |
; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2, addrspace 3)
- ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+ ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2, addrspace 3)
+ ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+ ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2, addrspace 3)
+ ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+ ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2, addrspace 3)
+ ; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+ ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2, addrspace 3)
+ ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
- ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+ ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
- ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
- ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+ ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32)
; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
- ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+ ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
- ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
- ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+ ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
- ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
- ; VI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2, addrspace 3)
- ; VI: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
- ; VI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2, addrspace 3)
- ; VI: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
- ; VI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2, addrspace 3)
- ; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
- ; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2, addrspace 3)
; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
- ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+ ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
- ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
- ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; VI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+ ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32)
; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
- ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+ ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
- ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
- ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+ ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+ ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
- ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128)
+ ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; GFX9-LABEL: name: test_load_local_s128_align2
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, addrspace 3)
@@ -3491,45 +3462,46 @@ body: |
; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 2, addrspace 3)
- ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C3]](s32)
+ ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2, addrspace 3)
+ ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C4]](s32)
+ ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2, addrspace 3)
+ ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
+ ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2, addrspace 3)
+ ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
+ ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2, addrspace 3)
+ ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
- ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
+ ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C7]]
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
- ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]]
- ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32)
+ ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C7]]
+ ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32)
; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
- ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
+ ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C7]]
; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
- ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]]
- ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32)
+ ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C7]]
+ ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32)
; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
- ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
- ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD3:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C5]](s32)
- ; GFX9: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p3) :: (load 2, addrspace 3)
- ; GFX9: [[PTR_ADD4:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32)
- ; GFX9: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p3) :: (load 2, addrspace 3)
- ; GFX9: [[PTR_ADD5:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s32)
- ; GFX9: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD5]](p3) :: (load 2, addrspace 3)
- ; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD3]], [[C2]](s32)
- ; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 2, addrspace 3)
; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
- ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C3]]
+ ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
- ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C3]]
- ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32)
+ ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C7]]
+ ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32)
; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]]
; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
- ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]]
+ ; GFX9: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
- ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C3]]
- ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C4]](s32)
+ ; GFX9: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C7]]
+ ; GFX9: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32)
; GFX9: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]]
- ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
- ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128)
+ ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
%0:_(p3) = COPY $vgpr0
%1:_(s128) = G_LOAD %0 :: (load 16, align 2, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -3954,94 +3926,99 @@ body: |
; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3)
- ; VI: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+ ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+ ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+ ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+ ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+ ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
+ ; VI: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
- ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+ ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C15]]
; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
- ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
- ; VI: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
- ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+ ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C15]]
+ ; VI: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C16]](s16)
; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+ ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C15]]
; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
- ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
- ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+ ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C15]]
+ ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C16]](s16)
; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
- ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+ ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C15]]
; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
- ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
- ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+ ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C15]]
+ ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C16]](s16)
; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
- ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+ ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C15]]
; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
- ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
- ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
- ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
- ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
- ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
- ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
- ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
- ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
- ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
- ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
- ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
- ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
- ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
- ; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
- ; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
- ; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
- ; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
- ; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
- ; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
- ; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
- ; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
+ ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C15]]
+ ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C16]](s16)
+ ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
- ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+ ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C15]]
; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
- ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
- ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
- ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+ ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C15]]
+ ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C16]](s16)
+ ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
- ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+ ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C15]]
; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
- ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
- ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
- ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+ ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C15]]
+ ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C16]](s16)
+ ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
- ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+ ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C15]]
; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
- ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
- ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
- ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+ ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C15]]
+ ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C16]](s16)
+ ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
- ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+ ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C15]]
; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
- ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
- ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
- ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
- ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
- ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
- ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+ ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]]
+ ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16)
+ ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]]
+ ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; VI: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32)
+ ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]]
+ ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+ ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+ ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32)
+ ; VI: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]]
+ ; VI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+ ; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+ ; VI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32)
; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
- ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
- ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
- ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+ ; VI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+ ; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+ ; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32)
; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
- ; VI: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
- ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128)
+ ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
; GFX9-LABEL: name: test_load_local_s128_align1
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -4066,94 +4043,99 @@ body: |
; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
+ ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+ ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+ ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+ ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+ ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+ ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C15:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
- ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C7]]
+ ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C15]]
; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD1]](s32)
- ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C7]]
- ; GFX9: [[C8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
- ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C8]](s16)
+ ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C15]]
+ ; GFX9: [[C16:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+ ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C16]](s16)
; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]]
; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
- ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C7]]
+ ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C15]]
; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD3]](s32)
- ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C7]]
- ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C8]](s16)
+ ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C15]]
+ ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C16]](s16)
; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]]
; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD4]](s32)
- ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C7]]
+ ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C15]]
; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD5]](s32)
- ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C7]]
- ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C8]](s16)
+ ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C15]]
+ ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C16]](s16)
; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]]
; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD6]](s32)
- ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C7]]
+ ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C15]]
; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD7]](s32)
- ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C7]]
- ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C8]](s16)
+ ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C15]]
+ ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C16]](s16)
; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]]
- ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
- ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
- ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
- ; GFX9: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
- ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
- ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
- ; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C9]](s32)
- ; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
- ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
- ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
- ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
- ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
- ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
- ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
- ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
- ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
- ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
- ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD8]](s32)
- ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C7]]
+ ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C15]]
; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD9]](s32)
- ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C7]]
- ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C8]](s16)
- ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]]
+ ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C15]]
+ ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C16]](s16)
+ ; GFX9: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]]
; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD10]](s32)
- ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C7]]
+ ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C15]]
; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD11]](s32)
- ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C7]]
- ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C8]](s16)
- ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]]
+ ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C15]]
+ ; GFX9: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C16]](s16)
+ ; GFX9: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]]
; GFX9: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD12]](s32)
- ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C7]]
+ ; GFX9: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C15]]
; GFX9: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD13]](s32)
- ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C7]]
- ; GFX9: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C8]](s16)
- ; GFX9: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]]
+ ; GFX9: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C15]]
+ ; GFX9: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C16]](s16)
+ ; GFX9: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]]
; GFX9: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD14]](s32)
- ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C7]]
+ ; GFX9: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C15]]
; GFX9: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD15]](s32)
- ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C7]]
- ; GFX9: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C8]](s16)
- ; GFX9: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]]
- ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
- ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
- ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C9]](s32)
+ ; GFX9: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C15]]
+ ; GFX9: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C16]](s16)
+ ; GFX9: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]]
+ ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16)
+ ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16)
+ ; GFX9: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C17]](s32)
+ ; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]]
+ ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16)
+ ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16)
+ ; GFX9: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C17]](s32)
+ ; GFX9: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]]
+ ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
+ ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16)
+ ; GFX9: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C17]](s32)
; GFX9: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]]
- ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16)
- ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16)
- ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C9]](s32)
+ ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16)
+ ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
+ ; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C17]](s32)
; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
- ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32)
- ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128)
+ ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR8]](s32), [[OR9]](s32), [[OR10]](s32), [[OR11]](s32)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128)
%0:_(p3) = COPY $vgpr0
%1:_(s128) = G_LOAD %0 :: (load 16, align 1, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -5846,33 +5828,38 @@ body: |
; VI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; VI: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
; VI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3)
- ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
- ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
- ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
- ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
- ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
- ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
- ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
- ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
- ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
- ; VI: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>)
; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
+ ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+ ; VI: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
; VI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
+ ; VI: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; VI: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32)
; VI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
+ ; VI: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+ ; VI: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
; VI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
+ ; VI: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32)
; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
+ ; VI: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+ ; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32)
; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
+ ; VI: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; VI: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32)
; VI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
- ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
+ ; VI: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; VI: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32)
; VI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
+ ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
+ ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
+ ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
+ ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
+ ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LOAD4]](s32)
+ ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LOAD5]](s32)
+ ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
+ ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
; VI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
; VI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
; VI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LOAD10]](s32)
@@ -5881,10 +5868,9 @@ body: |
; VI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[LOAD13]](s32)
; VI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
; VI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
- ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
- ; VI: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<8 x s32>)
- ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<16 x s8>)
+ ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32)
+ ; VI: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s32>)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<16 x s8>)
; GFX9-LABEL: name: test_load_local_v16s8_align16
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -5909,6 +5895,30 @@ body: |
; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; GFX9: [[PTR_ADD6:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C6]](s32)
; GFX9: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD6]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
+ ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+ ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
+ ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+ ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+ ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+ ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+ ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
+ ; GFX9: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[COPY2]](s32)
@@ -5921,25 +5931,6 @@ body: |
; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LOAD6]](s32)
; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LOAD7]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32)
- ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>)
- ; GFX9: [[TRUNC:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
- ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
- ; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD8:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C]](s32)
- ; GFX9: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD8]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD9:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s32)
- ; GFX9: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD9]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD10:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C2]](s32)
- ; GFX9: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD10]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C3]](s32)
- ; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C4]](s32)
- ; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD13:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C5]](s32)
- ; GFX9: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD13]](p3) :: (load 1, addrspace 3)
- ; GFX9: [[PTR_ADD14:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
- ; GFX9: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD14]](p3) :: (load 1, addrspace 3)
; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LOAD8]](s32)
; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LOAD9]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[COPY10]](s32)
@@ -5952,10 +5943,9 @@ body: |
; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[LOAD14]](s32)
; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[LOAD15]](s32)
; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32)
- ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
- ; GFX9: [[TRUNC1:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS1]](<8 x s16>)
- ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS [[TRUNC]](<8 x s8>), [[TRUNC1]](<8 x s8>)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS2]](<16 x s8>)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>)
+ ; GFX9: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<16 x s16>)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[TRUNC]](<16 x s8>)
%0:_(p3) = COPY $vgpr0
%1:_(<16 x s8>) = G_LOAD %0 :: (load 16, align 1, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -8187,24 +8177,12 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
; VI-LABEL: name: test_load_local_v3s32_align4
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3)
- ; VI: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; VI: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
- ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+ ; VI: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
; GFX9-LABEL: name: test_load_local_v3s32_align4
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3)
- ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
- ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0
- ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT1]](<3 x s32>)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>)
%0:_(p3) = COPY $vgpr0
%1:_(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2 = COPY %1
@@ -8238,20 +8216,12 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; VI-LABEL: name: test_load_local_v4s32_align16
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; GFX9-LABEL: name: test_load_local_v4s32_align16
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
%0:_(p3) = COPY $vgpr0
%1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 16, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -8285,20 +8255,12 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; VI-LABEL: name: test_load_local_v4s32_align8
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; GFX9-LABEL: name: test_load_local_v4s32_align8
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 8, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
%0:_(p3) = COPY $vgpr0
%1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 8, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -8332,20 +8294,12 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; VI-LABEL: name: test_load_local_v4s32_align4
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
; GFX9-LABEL: name: test_load_local_v4s32_align4
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>)
%0:_(p3) = COPY $vgpr0
%1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -9048,7 +9002,6 @@ body: |
; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
- ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
; VI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; VI: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
; VI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
@@ -9074,7 +9027,8 @@ body: |
; VI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
; VI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32)
; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]]
- ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
+ ; VI: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; VI: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
; VI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
; VI: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
; VI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
@@ -9098,9 +9052,8 @@ body: |
; VI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16)
; VI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32)
; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
- ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
- ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
; GFX9-LABEL: name: test_load_local_v4s32_align1
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 1, addrspace 3)
@@ -9157,7 +9110,6 @@ body: |
; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16)
; GFX9: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
; GFX9: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]]
- ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32)
; GFX9: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; GFX9: [[PTR_ADD7:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C7]](s32)
; GFX9: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD7]](p3) :: (load 1, addrspace 3)
@@ -9183,7 +9135,8 @@ body: |
; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16)
; GFX9: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32)
; GFX9: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL8]]
- ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD7]], [[C6]](s32)
+ ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; GFX9: [[PTR_ADD11:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C8]](s32)
; GFX9: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD11]](p3) :: (load 1, addrspace 3)
; GFX9: [[PTR_ADD12:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD11]], [[C]](s32)
; GFX9: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD12]](p3) :: (load 1, addrspace 3)
@@ -9207,9 +9160,8 @@ body: |
; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16)
; GFX9: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C5]](s32)
; GFX9: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]]
- ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR8]](s32), [[OR11]](s32)
- ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s32>), [[BUILD_VECTOR1]](<2 x s32>)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>)
+ ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
%0:_(p3) = COPY $vgpr0
%1:_(<4 x s32>) = G_LOAD %0 :: (load 16, align 1, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -9259,31 +9211,19 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
; VI-LABEL: name: test_load_local_v8s32_align32
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
+ ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; VI: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3)
- ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; VI: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load 8, addrspace 3)
- ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>)
+ ; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16, addrspace 3)
+ ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
; GFX9-LABEL: name: test_load_local_v8s32_align32
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; GFX9: [[LOAD2:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3)
- ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; GFX9: [[LOAD3:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD2]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<2 x s32>), [[LOAD1]](<2 x s32>), [[LOAD2]](<2 x s32>), [[LOAD3]](<2 x s32>)
+ ; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p3) :: (load 16, addrspace 3)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<8 x s32>)
%0:_(p3) = COPY $vgpr0
%1:_(<8 x s32>) = G_LOAD %0 :: (load 32, align 32, addrspace 3)
@@ -9349,20 +9289,12 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
; VI-LABEL: name: test_load_local_v2s64_align4
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
; GFX9-LABEL: name: test_load_local_v2s64_align4
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>)
%0:_(p3) = COPY $vgpr0
%1:_(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -10033,30 +9965,28 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
; VI-LABEL: name: test_load_local_v3s64_align32
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
+ ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3)
- ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64)
- ; VI: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; VI: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 16, addrspace 3)
+ ; VI: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
+ ; VI: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
+ ; VI: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
+ ; VI: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+ ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
; GFX9-LABEL: name: test_load_local_v3s64_align32
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; GFX9: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3)
- ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64)
- ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
- ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
+ ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 16, addrspace 3)
+ ; GFX9: [[DEF:%[0-9]+]]:_(<3 x s64>) = G_IMPLICIT_DEF
+ ; GFX9: [[INSERT:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[DEF]], [[LOAD]](<2 x s64>), 0
+ ; GFX9: [[INSERT1:%[0-9]+]]:_(<3 x s64>) = G_INSERT [[INSERT]], [[LOAD1]](s64), 128
+ ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
+ ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF1]], [[INSERT1]](<3 x s64>), 0
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT2]](<4 x s64>)
%0:_(p3) = COPY $vgpr0
%1:_(<3 x s64>) = G_LOAD %0 :: (load 24, align 32, addrspace 3)
%2:_(<4 x s64>) = G_IMPLICIT_DEF
@@ -10108,32 +10038,20 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
; VI-LABEL: name: test_load_local_v4s64_align32
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; VI: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
+ ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3)
- ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; VI: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load 8, addrspace 3)
- ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; VI: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16, addrspace 3)
+ ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
; GFX9-LABEL: name: test_load_local_v4s64_align32
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 32, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 32, addrspace 3)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; GFX9: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 16, addrspace 3)
- ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
- ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; GFX9: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD2]](p3) :: (load 8, addrspace 3)
- ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[LOAD3]](s64)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
+ ; GFX9: [[LOAD1:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[PTR_ADD]](p3) :: (load 16, addrspace 3)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[CONCAT_VECTORS]](<4 x s64>)
%0:_(p3) = COPY $vgpr0
%1:_(<4 x s64>) = G_LOAD %0 :: (load 32, align 32, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -10167,20 +10085,12 @@ body: |
; CI-DS128: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
; VI-LABEL: name: test_load_local_v2p1_align4
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[LOAD]](p1), [[LOAD1]](p1)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
+ ; VI: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
; GFX9-LABEL: name: test_load_local_v2p1_align4
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[LOAD]](p1), [[LOAD1]](p1)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
%0:_(p3) = COPY $vgpr0
%1:_(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3)
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -12026,46 +11936,24 @@ body: |
; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
; VI-LABEL: name: test_extload_local_v2s96_from_24_align4
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3)
- ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
- ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[DEF]](s96)
- ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY1]], [[LOAD]](s64), 0
- ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
- ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4, addrspace 3)
- ; VI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD2]](s64), 0
- ; VI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
- ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96)
- ; VI: [[COPY3:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96)
- ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
- ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+ ; VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p3) :: (load 12, align 4, addrspace 3)
+ ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[LOAD]](s96)
+ ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[LOAD1]](s96)
+ ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+ ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align4
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, addrspace 3)
- ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
- ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[DEF]](s96)
- ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY1]], [[LOAD]](s64), 0
- ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; GFX9: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
- ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4, addrspace 3)
- ; GFX9: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD2]](s64), 0
- ; GFX9: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
- ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96)
- ; GFX9: [[COPY3:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96)
- ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
- ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+ ; GFX9: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[LOAD]](s96)
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[LOAD1]](s96)
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+ ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
%0:_(p3) = COPY $vgpr0
%1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 4, addrspace 3)
%2:_(s96) = G_EXTRACT %1, 0
@@ -12134,46 +12022,24 @@ body: |
; CI-DS128: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
; VI-LABEL: name: test_extload_local_v2s96_from_24_align16
; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; VI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3)
- ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; VI: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3)
+ ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; VI: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, align 8, addrspace 3)
- ; VI: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
- ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[DEF]](s96)
- ; VI: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY1]], [[LOAD]](s64), 0
- ; VI: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; VI: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; VI: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 4, addrspace 3)
- ; VI: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
- ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4, addrspace 3)
- ; VI: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD2]](s64), 0
- ; VI: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
- ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96)
- ; VI: [[COPY3:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96)
- ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
- ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+ ; VI: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p3) :: (load 12, align 4, addrspace 3)
+ ; VI: [[COPY1:%[0-9]+]]:_(s96) = COPY [[LOAD]](s96)
+ ; VI: [[COPY2:%[0-9]+]]:_(s96) = COPY [[LOAD1]](s96)
+ ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+ ; VI: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
; GFX9-LABEL: name: test_extload_local_v2s96_from_24_align16
; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; GFX9: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 16, addrspace 3)
- ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; GFX9: [[LOAD:%[0-9]+]]:_(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 16, addrspace 3)
+ ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; GFX9: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
- ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 4, align 8, addrspace 3)
- ; GFX9: [[DEF:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
- ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[DEF]](s96)
- ; GFX9: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[COPY1]], [[LOAD]](s64), 0
- ; GFX9: [[INSERT1:%[0-9]+]]:_(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64
- ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
- ; GFX9: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load 8, align 4, addrspace 3)
- ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p3) = G_PTR_ADD [[PTR_ADD1]], [[C]](s32)
- ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p3) :: (load 4, addrspace 3)
- ; GFX9: [[INSERT2:%[0-9]+]]:_(s96) = G_INSERT [[DEF]], [[LOAD2]](s64), 0
- ; GFX9: [[INSERT3:%[0-9]+]]:_(s96) = G_INSERT [[INSERT2]], [[LOAD3]](s32), 64
- ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[INSERT1]](s96)
- ; GFX9: [[COPY3:%[0-9]+]]:_(s96) = COPY [[INSERT3]](s96)
- ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY2]](s96)
- ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY3]](s96)
+ ; GFX9: [[LOAD1:%[0-9]+]]:_(s96) = G_LOAD [[PTR_ADD]](p3) :: (load 12, align 4, addrspace 3)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY [[LOAD]](s96)
+ ; GFX9: [[COPY2:%[0-9]+]]:_(s96) = COPY [[LOAD1]](s96)
+ ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96)
+ ; GFX9: $vgpr3_vgpr4_vgpr5 = COPY [[COPY2]](s96)
%0:_(p3) = COPY $vgpr0
%1:_(<2 x s96>) = G_LOAD %0 :: (load 24, align 16, addrspace 3)
%2:_(s96) = G_EXTRACT %1, 0
diff --git a/llvm/test/CodeGen/AMDGPU/concat_vectors.ll b/llvm/test/CodeGen/AMDGPU/concat_vectors.ll
index e50173b7caf8..4fe3008011a2 100644
--- a/llvm/test/CodeGen/AMDGPU/concat_vectors.ll
+++ b/llvm/test/CodeGen/AMDGPU/concat_vectors.ll
@@ -309,8 +309,8 @@ define amdgpu_kernel void @concat_vector_crash2(<8 x i8> addrspace(1)* %out, i32
; GCN-LABEL: {{^}}build_vector_splat_concat_v8i16:
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
-; VI: ds_write_b64
-; VI: ds_write2_b64
+; VI: ds_write_b128
+; VI: ds_write_b128
define amdgpu_kernel void @build_vector_splat_concat_v8i16() {
entry:
store <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* undef, align 16
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll b/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
index ef4efc6336ce..5e2fc937a86c 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt,-enable-ds128 < %s | FileCheck -check-prefixes=CI,NODS128 %s
@lds = addrspace(3) global [512 x float] undef, align 4
@lds.v2 = addrspace(3) global [512 x <2 x float>] undef, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
index 699f2060bf26..39afd17298ed 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -49,8 +49,8 @@ define amdgpu_kernel void @private_access_f64_alloca(double addrspace(1)* noalia
; SI-PROMOTE: ds_write_b64
; SI-PROMOTE: ds_read_b64
; SI-PROMOTE: ds_read_b64
-; CI-PROMOTE: ds_write2_b64
-; CI-PROMOTE: ds_read2_b64
+; CI-PROMOTE: ds_write_b128
+; CI-PROMOTE: ds_read_b128
define amdgpu_kernel void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 {
%val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
%array = alloca [4 x <2 x double>], align 16, addrspace(5)
@@ -107,8 +107,8 @@ define amdgpu_kernel void @private_access_i64_alloca(i64 addrspace(1)* noalias %
; SI-PROMOTE: ds_write_b64
; SI-PROMOTE: ds_read_b64
; SI-PROMOTE: ds_read_b64
-; CI-PROMOTE: ds_write2_b64
-; CI-PROMOTE: ds_read2_b64
+; CI-PROMOTE: ds_write_b128
+; CI-PROMOTE: ds_read_b128
define amdgpu_kernel void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%array = alloca [4 x <2 x i64>], align 16, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/insert-subvector-unused-scratch.ll b/llvm/test/CodeGen/AMDGPU/insert-subvector-unused-scratch.ll
index 9a23df97d9a5..c4599ee2b353 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-subvector-unused-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-subvector-unused-scratch.ll
@@ -18,9 +18,9 @@ define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %
; GCN-LABEL: store_v5i32:
; GCN: ds_read_b32
-; GCN: ds_read2_b64
+; GCN: ds_read_b128
; GCN: ds_write_b32
-; GCN: ds_write2_b64
+; GCN: ds_write_b128
; GCN: ScratchSize: 0
define amdgpu_kernel void @store_v5i32(<5 x i32> addrspace(3)* %out, <5 x i32> %a) nounwind {
%val = load <5 x i32>, <5 x i32> addrspace(3)* %out
@@ -28,5 +28,3 @@ define amdgpu_kernel void @store_v5i32(<5 x i32> addrspace(3)* %out, <5 x i32> %
store <5 x i32> %val.1, <5 x i32> addrspace(3)* %out, align 16
ret void
}
-
-
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-f32.ll b/llvm/test/CodeGen/AMDGPU/load-local-f32.ll
index c33c00073156..a0559c17a161 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-f32.ll
@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
; Testing for ds_read/write_128
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-f64.ll b/llvm/test/CodeGen/AMDGPU/load-local-f64.ll
index e313b38b73d7..7495860107f5 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-f64.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
; Testing for ds_read_b128
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
index d8d7d98e3088..357141b36a85 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll
@@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
; Testing for ds_read/write_b128
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll
index 10887885ef67..0063bb0b6d21 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i32.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; Testing for ds_read/write_128
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i64.ll b/llvm/test/CodeGen/AMDGPU/load-local-i64.ll
index d91c3b5be928..8a07640e4f4d 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i64.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
; Testing for ds_read/write_b128
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i8.ll b/llvm/test/CodeGen/AMDGPU/load-local-i8.ll
index 02477eff677b..8137ded1454d 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i8.ll
@@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
; Testing for ds_read/write_b128
diff --git a/llvm/test/CodeGen/AMDGPU/local-64.ll b/llvm/test/CodeGen/AMDGPU/local-64.ll
index f0dca0780aae..3e85dd5905fc 100644
--- a/llvm/test/CodeGen/AMDGPU/local-64.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-64.ll
@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,SICIVI %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SICIVI %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,SICIVI,CIPLUS %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SICIVI,CIPLUS %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,CIPLUS %s
; GCN-LABEL: {{^}}local_i32_load
; SICIVI: s_mov_b32 m0
@@ -165,7 +165,8 @@ define amdgpu_kernel void @local_f64_store_0_offset(double addrspace(3)* %out) n
; GFX9-NOT: m0
; GCN-NOT: add
-; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
+; SI: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
+; CIPLUS: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112
; GCN: s_endpgm
define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <2 x i64>, <2 x i64> addrspace(3)* %out, i32 7
@@ -178,7 +179,10 @@ define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounw
; GFX9-NOT: m0
; GCN-NOT: add
-; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
+
+; SI: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1{{$}}
+; CIPLUS: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}}
+
; GCN: s_endpgm
define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
@@ -190,8 +194,12 @@ define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %o
; GFX9-NOT: m0
; GCN-NOT: add
-; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
-; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
+; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
+; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
+
+; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224{{$}}
+; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240{{$}}
+
; GCN: s_endpgm
define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <4 x i64>, <4 x i64> addrspace(3)* %out, i32 7
@@ -204,8 +212,12 @@ define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounw
; GFX9-NOT: m0
; GCN-NOT: add
-; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
-; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
+; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
+; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
+
+; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}}
+; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16{{$}}
+
; GCN: s_endpgm
define amdgpu_kernel void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
diff --git a/llvm/test/CodeGen/AMDGPU/reorder-stores.ll b/llvm/test/CodeGen/AMDGPU/reorder-stores.ll
index 260b32ed3406..a379a646bee9 100644
--- a/llvm/test/CodeGen/AMDGPU/reorder-stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/reorder-stores.ll
@@ -1,12 +1,12 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefix=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,VI %s
-; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
+; GCN-LABEL: {{^}}no_reorder_v2f64_global_load_store:
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: s_endpgm
define amdgpu_kernel void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
%tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16
@@ -15,10 +15,14 @@ define amdgpu_kernel void @no_reorder_v2f64_global_load_store(<2 x double> addrs
ret void
}
-; SI-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store:
+; GCN-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store:
; SI: ds_read2_b64
; SI: ds_write2_b64
-; SI: s_endpgm
+
+; VI: ds_read_b128
+; VI: ds_write_b128
+
+; GCN: s_endpgm
define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
%tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16
%tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16
@@ -27,18 +31,18 @@ define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(<2 x dou
ret void
}
-; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
-; SI: buffer_load_dwordx4
+; GCN-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: buffer_store_dwordx4
-; SI: s_endpgm
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: s_endpgm
define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
%tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32
@@ -47,13 +51,13 @@ define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(<8 x i32> ad
ret void
}
-; SI-LABEL: {{^}}no_reorder_extload_64:
-; SI: ds_read_b64
-; SI: ds_read_b64
-; SI: ds_write_b64
-; SI-NOT: ds_read
-; SI: ds_write_b64
-; SI: s_endpgm
+; GCN-LABEL: {{^}}no_reorder_extload_64:
+; GCN: ds_read_b64
+; GCN: ds_read_b64
+; GCN: ds_write_b64
+; GCN-NOT: ds_read
+; GCN: ds_write_b64
+; GCN: s_endpgm
define amdgpu_kernel void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
%tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8
%tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8
diff --git a/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll b/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
index c4db4c5734a5..6cecc23a8810 100644
--- a/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt,-enable-ds128 < %s | FileCheck -check-prefix=GCN %s
@sPrivateStorage = internal addrspace(3) global [256 x [8 x <4 x i64>]] undef
diff --git a/llvm/test/CodeGen/AMDGPU/store-local.ll b/llvm/test/CodeGen/AMDGPU/store-local.ll
index f0e76ac59510..f302ea099b75 100644
--- a/llvm/test/CodeGen/AMDGPU/store-local.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-local.ll
@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
-; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,VI,FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s
@@ -156,7 +156,9 @@ entry:
; CM: LDS_WRITE
; CM: LDS_WRITE
-; GCN: ds_write2_b64
+; SI: ds_write2_b32
+; VI: ds_write_b128
+; GFX9: ds_write_b128
define amdgpu_kernel void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(3)* %out
diff --git a/llvm/test/CodeGen/AMDGPU/store-v3i64.ll b/llvm/test/CodeGen/AMDGPU/store-v3i64.ll
index 7af1736b3207..b2e69deb73cf 100644
--- a/llvm/test/CodeGen/AMDGPU/store-v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-v3i64.ll
@@ -46,8 +46,14 @@ define amdgpu_kernel void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)*
}
; GCN-LABEL: {{^}}local_store_v3i64:
-; GCN: ds_write2_b64
-; GCN: ds_write_b64
+; SI: ds_write2_b64
+; SI: ds_write_b64
+
+; CI: ds_write_b64
+; CI: ds_write_b128
+
+; VI: ds_write_b64
+; VI: ds_write_b128
define amdgpu_kernel void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32
ret void
More information about the llvm-commits
mailing list