[llvm] 1a7f115 - AMDGPU/GlobalISel: Extend load/store workaround to i128 vectors
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 15 11:55:18 PDT 2020
Author: Matt Arsenault
Date: 2020-06-15T14:55:11-04:00
New Revision: 1a7f115dce22b2c09fdd4f7f79d24da5de6eaef8
URL: https://github.com/llvm/llvm-project/commit/1a7f115dce22b2c09fdd4f7f79d24da5de6eaef8
DIFF: https://github.com/llvm/llvm-project/commit/1a7f115dce22b2c09fdd4f7f79d24da5de6eaef8.diff
LOG: AMDGPU/GlobalISel: Extend load/store workaround to i128 vectors
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ad0c43cceeec..2f701ca04d78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -307,7 +307,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
return false;
if (!Ty.isVector())
return true;
- return Ty.getElementType().getSizeInBits() == 16;
+ unsigned EltSize = Ty.getElementType().getSizeInBits();
+ return EltSize != 32 && EltSize != 64;
}
static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
index a6c78a0d6e23..b85f3ad94c63 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir
@@ -10377,24 +10377,29 @@ body: |
; CI-LABEL: name: test_load_constant_v2s128_align32
; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; CI: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
- ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; CI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
+ ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; VI-LABEL: name: test_load_constant_v2s128_align32
; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; VI: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
+ ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-LABEL: name: test_load_constant_v2s128_align32
; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; GFX9: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; CI-MESA-LABEL: name: test_load_constant_v2s128_align32
; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
- ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
+ ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-MESA-LABEL: name: test_load_constant_v2s128_align32
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
- ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
- ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load 32, addrspace 4)
+ ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
%0:_(p4) = COPY $vgpr0_vgpr1
%1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 4)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
@@ -13259,3 +13264,39 @@ body: |
%1:_(s512) = G_LOAD %0 :: (load 64, align 32, addrspace 4)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
...
+
+---
+name: test_load_constant_v4s128_align32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CI-LABEL: name: test_load_constant_v4s128_align32
+ ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
+ ; CI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4)
+ ; CI: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
+ ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
+ ; VI-LABEL: name: test_load_constant_v4s128_align32
+ ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
+ ; VI: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4)
+ ; VI: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
+ ; GFX9-LABEL: name: test_load_constant_v4s128_align32
+ ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
+ ; GFX9: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
+ ; CI-MESA-LABEL: name: test_load_constant_v4s128_align32
+ ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
+ ; CI-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4)
+ ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
+ ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
+ ; GFX9-MESA-LABEL: name: test_load_constant_v4s128_align32
+ ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
+ ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load 64, align 32, addrspace 4)
+ ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<4 x s128>) = G_BITCAST [[LOAD]](<16 x s32>)
+ ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<4 x s128>)
+ %0:_(p4) = COPY $vgpr0_vgpr1
+ %1:_(<4 x s128>) = G_LOAD %0 :: (load 64, align 32, addrspace 4)
+ $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
index 1f936edff1d4..aaa665e9984b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir
@@ -10477,53 +10477,48 @@ body: |
; CI-LABEL: name: test_load_flat_v2s128_align32
; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; CI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
- ; CI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16)
- ; CI: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD1]](<4 x s32>)
- ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[BITCAST]](s128), [[BITCAST1]](s128)
- ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>)
+ ; CI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+ ; CI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+ ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; VI-LABEL: name: test_load_flat_v2s128_align32
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
- ; VI: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16)
- ; VI: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD1]](<4 x s32>)
- ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[BITCAST]](s128), [[BITCAST1]](s128)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>)
+ ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+ ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-LABEL: name: test_load_flat_v2s128_align32
; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
- ; GFX9: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16)
- ; GFX9: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD1]](<4 x s32>)
- ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[BITCAST]](s128), [[BITCAST1]](s128)
- ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>)
+ ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+ ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; CI-MESA-LABEL: name: test_load_flat_v2s128_align32
; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
- ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16)
- ; CI-MESA: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD1]](<4 x s32>)
- ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[BITCAST]](s128), [[BITCAST1]](s128)
- ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>)
+ ; CI-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+ ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+ ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-MESA-LABEL: name: test_load_flat_v2s128_align32
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p0) :: (load 16, align 32)
- ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD]](<4 x s32>)
; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load 16 + 16)
- ; GFX9-MESA: [[BITCAST1:%[0-9]+]]:_(s128) = G_BITCAST [[LOAD1]](<4 x s32>)
- ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s128>) = G_BUILD_VECTOR [[BITCAST]](s128), [[BITCAST1]](s128)
- ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<2 x s128>)
+ ; GFX9-MESA: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+ ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s32>)
+ ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 0)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index b80a6edd427f..d16e9631e765 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -9916,28 +9916,34 @@ body: |
; SI-LABEL: name: test_load_global_v2s128_align32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; SI: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
- ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; SI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+ ; SI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; CI-HSA-LABEL: name: test_load_global_v2s128_align32
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-HSA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
- ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; CI-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+ ; CI-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; CI-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; CI-MESA-LABEL: name: test_load_global_v2s128_align32
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI-MESA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
- ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; CI-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+ ; CI-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; CI-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; VI-LABEL: name: test_load_global_v2s128_align32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; VI: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
- ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; VI: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+ ; VI: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-HSA-LABEL: name: test_load_global_v2s128_align32
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
- ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+ ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; GFX9-HSA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
; GFX9-MESA-LABEL: name: test_load_global_v2s128_align32
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<2 x s128>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
- ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[LOAD]](<2 x s128>)
+ ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[COPY]](p1) :: (load 32, addrspace 1)
+ ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(<2 x s128>) = G_BITCAST [[LOAD]](<8 x s32>)
+ ; GFX9-MESA: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<2 x s128>)
%0:_(p1) = COPY $vgpr0_vgpr1
%1:_(<2 x s128>) = G_LOAD %0 :: (load 32, align 32, addrspace 1)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index e6fd8609b911..5ebbd48f06ed 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -6770,45 +6770,49 @@ body: |
...
---
-name: test_store_global_v8s32_align32
+name: test_store_global_v2s128_align32
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
- ; SI-LABEL: name: test_store_global_v8s32_align32
+ ; SI-LABEL: name: test_store_global_v2s128_align32
; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
- ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+ ; SI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ ; SI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
+ ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
- ; CI-LABEL: name: test_store_global_v8s32_align32
+ ; CI-LABEL: name: test_store_global_v2s128_align32
; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
- ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+ ; CI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ ; CI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
+ ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
- ; VI-LABEL: name: test_store_global_v8s32_align32
+ ; VI-LABEL: name: test_store_global_v2s128_align32
; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
- ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+ ; VI: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ ; VI: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
+ ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
- ; GFX9-LABEL: name: test_store_global_v8s32_align32
+ ; GFX9-LABEL: name: test_store_global_v2s128_align32
; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
- ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
- ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+ ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ ; GFX9: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY1]](<2 x s128>)
+ ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s32>)
; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
%0:_(p1) = COPY $vgpr0_vgpr1
- %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ %1:_(<2 x s128>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
G_STORE %1, %0 :: (store 32, align 32, addrspace 1)
...
@@ -7472,6 +7476,49 @@ body: |
G_STORE %1, %0 :: (store 32, align 32, addrspace 1)
...
+---
+name: test_store_global_v8s32_align32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+
+ ; SI-LABEL: name: test_store_global_v8s32_align32
+ ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; SI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ ; SI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+ ; SI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
+ ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; SI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
+ ; CI-LABEL: name: test_store_global_v8s32_align32
+ ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; CI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ ; CI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+ ; CI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
+ ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
+ ; VI-LABEL: name: test_store_global_v8s32_align32
+ ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; VI: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ ; VI: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+ ; VI: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
+ ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; VI: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
+ ; GFX9-LABEL: name: test_store_global_v8s32_align32
+ ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ ; GFX9: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY1]](<8 x s32>)
+ ; GFX9: G_STORE [[UV]](<4 x s32>), [[COPY]](p1) :: (store 16, align 32, addrspace 1)
+ ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; GFX9: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD]](p1) :: (store 16 + 16, addrspace 1)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(<8 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9
+ G_STORE %1, %0 :: (store 32, align 32, addrspace 1)
+...
+
---
name: test_store_global_v9s32_align1
body: |
More information about the llvm-commits
mailing list