[llvm] d13ce17 - AMDGPU/GlobalISel: Add regbankselect test for vgpr(dest) sgpr(address) load
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Mon May 10 01:20:38 PDT 2021
Author: Petar Avramovic
Date: 2021-05-10T10:18:30+02:00
New Revision: d13ce17bb4008b2907e6e85882a9295dce9f6b0a
URL: https://github.com/llvm/llvm-project/commit/d13ce17bb4008b2907e6e85882a9295dce9f6b0a
DIFF: https://github.com/llvm/llvm-project/commit/d13ce17bb4008b2907e6e85882a9295dce9f6b0a.diff
LOG: AMDGPU/GlobalISel: Add regbankselect test for vgpr(dest) sgpr(address) load
Pre-commit for D101992.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir
new file mode 100644
index 0000000000000..e4d1fc17453e5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-uniform-load-noclobber.mir
@@ -0,0 +1,125 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=kaveri -run-pass=regbankselect -mattr=+unaligned-access-mode %s -verify-machineinstrs -o - | FileCheck -check-prefixes=GFX7 %s
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -run-pass=regbankselect -mattr=+unaligned-access-mode %s -verify-machineinstrs -o - | FileCheck -check-prefixes=GFX1010 %s
+
+---
+name: test_uniform_load_without_noclobber
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ ; GFX7-LABEL: name: test_uniform_load_without_noclobber
+ ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GFX7: %in_addr:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; GFX7: %out_addr:sgpr(p1) = COPY $sgpr2_sgpr3
+ ; GFX7: %load:vgpr(<16 x s32>) = G_LOAD %in_addr(p1) :: (load 64, align 4, addrspace 1)
+ ; GFX7: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>), %load8_11:vgpr(<4 x s32>), %load12_15:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>)
+ ; GFX7: G_STORE %load0_3(<4 x s32>), %out_addr(p1) :: (store 16, align 4, addrspace 1)
+ ; GFX7: %cst16:sgpr(s64) = G_CONSTANT i64 16
+ ; GFX7: %out_addr_plus_16:sgpr(p1) = G_PTR_ADD %out_addr, %cst16(s64)
+ ; GFX7: G_STORE %load4_7(<4 x s32>), %out_addr_plus_16(p1) :: (store 16, align 4, addrspace 1)
+ ; GFX7: %cst32:sgpr(s64) = G_CONSTANT i64 32
+ ; GFX7: %out_addr_plus_32:sgpr(p1) = G_PTR_ADD %out_addr, %cst32(s64)
+ ; GFX7: G_STORE %load8_11(<4 x s32>), %out_addr_plus_32(p1) :: (store 16, align 4, addrspace 1)
+ ; GFX7: %cst48:sgpr(s64) = G_CONSTANT i64 48
+ ; GFX7: %out_addr_plus_48:sgpr(p1) = G_PTR_ADD %out_addr, %cst48(s64)
+ ; GFX7: G_STORE %load12_15(<4 x s32>), %out_addr_plus_48(p1) :: (store 16, align 4, addrspace 1)
+ ; GFX7: S_ENDPGM 0
+ ; GFX1010-LABEL: name: test_uniform_load_without_noclobber
+ ; GFX1010: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GFX1010: %in_addr:sgpr(p1) = COPY $sgpr0_sgpr1
+ ; GFX1010: %out_addr:sgpr(p1) = COPY $sgpr2_sgpr3
+ ; GFX1010: [[COPY:%[0-9]+]]:vgpr(p1) = COPY %in_addr(p1)
+ ; GFX1010: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %in_addr(p1) :: (load 16, align 4, addrspace 1)
+ ; GFX1010: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16
+ ; GFX1010: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C]](s64)
+ ; GFX1010: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load 16 from unknown-address + 16, align 4, addrspace 1)
+ ; GFX1010: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32
+ ; GFX1010: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C1]](s64)
+ ; GFX1010: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load 16 from unknown-address + 32, align 4, addrspace 1)
+ ; GFX1010: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48
+ ; GFX1010: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD %in_addr, [[C2]](s64)
+ ; GFX1010: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load 16 from unknown-address + 48, align 4, addrspace 1)
+ ; GFX1010: %load:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>)
+ ; GFX1010: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>), %load8_11:vgpr(<4 x s32>), %load12_15:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>)
+ ; GFX1010: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY %out_addr(p1)
+ ; GFX1010: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store 16, align 4, addrspace 1)
+ ; GFX1010: %cst16:sgpr(s64) = G_CONSTANT i64 16
+ ; GFX1010: %out_addr_plus_16:sgpr(p1) = G_PTR_ADD %out_addr, %cst16(s64)
+ ; GFX1010: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_16(p1)
+ ; GFX1010: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store 16, align 4, addrspace 1)
+ ; GFX1010: %cst32:sgpr(s64) = G_CONSTANT i64 32
+ ; GFX1010: %out_addr_plus_32:sgpr(p1) = G_PTR_ADD %out_addr, %cst32(s64)
+ ; GFX1010: [[COPY3:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_32(p1)
+ ; GFX1010: G_STORE %load8_11(<4 x s32>), [[COPY3]](p1) :: (store 16, align 4, addrspace 1)
+ ; GFX1010: %cst48:sgpr(s64) = G_CONSTANT i64 48
+ ; GFX1010: %out_addr_plus_48:sgpr(p1) = G_PTR_ADD %out_addr, %cst48(s64)
+ ; GFX1010: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY %out_addr_plus_48(p1)
+ ; GFX1010: G_STORE %load12_15(<4 x s32>), [[COPY4]](p1) :: (store 16, align 4, addrspace 1)
+ ; GFX1010: S_ENDPGM 0
+ %in_addr:_(p1) = COPY $sgpr0_sgpr1
+ %out_addr:_(p1) = COPY $sgpr2_sgpr3
+ %load:_(<16 x s32>) = G_LOAD %in_addr(p1) :: (load 64, align 4, addrspace 1)
+ %load0_3:_(<4 x s32>), %load4_7:_(<4 x s32>), %load8_11:_(<4 x s32>), %load12_15:_(<4 x s32>) = G_UNMERGE_VALUES %load(<16 x s32>)
+ G_STORE %load0_3(<4 x s32>), %out_addr(p1) :: (store 16, align 4, addrspace 1)
+ %cst16:_(s64) = G_CONSTANT i64 16
+ %out_addr_plus_16:_(p1) = G_PTR_ADD %out_addr, %cst16(s64)
+ G_STORE %load4_7(<4 x s32>), %out_addr_plus_16(p1) :: (store 16, align 4, addrspace 1)
+ %cst32:_(s64) = G_CONSTANT i64 32
+ %out_addr_plus_32:_(p1) = G_PTR_ADD %out_addr, %cst32(s64)
+ G_STORE %load8_11(<4 x s32>), %out_addr_plus_32(p1) :: (store 16, align 4, addrspace 1)
+ %cst48:_(s64) = G_CONSTANT i64 48
+ %out_addr_plus_48:_(p1) = G_PTR_ADD %out_addr, %cst48(s64)
+ G_STORE %load12_15(<4 x s32>), %out_addr_plus_48(p1) :: (store 16, align 4, addrspace 1)
+ S_ENDPGM 0
+...
+
+---
+name: test_s_load_constant_v8i32_align1
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+
+ ; GFX7-LABEL: name: test_s_load_constant_v8i32_align1
+ ; GFX7: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GFX7: %ptr:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; GFX7: %out:sgpr(p1) = COPY $sgpr2_sgpr3
+ ; GFX7: %load:vgpr(<8 x s32>) = G_LOAD %ptr(p4) :: (load 32, align 1, addrspace 4)
+ ; GFX7: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>)
+ ; GFX7: G_STORE %load0_3(<4 x s32>), %out(p1) :: (store 16, align 32, addrspace 1)
+ ; GFX7: %cst_16:sgpr(s64) = G_CONSTANT i64 16
+ ; GFX7: %out_plus_16:sgpr(p1) = G_PTR_ADD %out, %cst_16(s64)
+ ; GFX7: G_STORE %load4_7(<4 x s32>), %out_plus_16(p1) :: (store 16, align 32, addrspace 1)
+ ; GFX7: S_ENDPGM 0
+ ; GFX1010-LABEL: name: test_s_load_constant_v8i32_align1
+ ; GFX1010: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GFX1010: %ptr:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; GFX1010: %out:sgpr(p1) = COPY $sgpr2_sgpr3
+ ; GFX1010: [[COPY:%[0-9]+]]:vgpr(p4) = COPY %ptr(p4)
+ ; GFX1010: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD %ptr(p4) :: (load 16, align 1, addrspace 4)
+ ; GFX1010: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16
+ ; GFX1010: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD %ptr, [[C]](s64)
+ ; GFX1010: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load 16 from unknown-address + 16, align 1, addrspace 4)
+ ; GFX1010: %load:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>)
+ ; GFX1010: %load0_3:vgpr(<4 x s32>), %load4_7:vgpr(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>)
+ ; GFX1010: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY %out(p1)
+ ; GFX1010: G_STORE %load0_3(<4 x s32>), [[COPY1]](p1) :: (store 16, align 32, addrspace 1)
+ ; GFX1010: %cst_16:sgpr(s64) = G_CONSTANT i64 16
+ ; GFX1010: %out_plus_16:sgpr(p1) = G_PTR_ADD %out, %cst_16(s64)
+ ; GFX1010: [[COPY2:%[0-9]+]]:vgpr(p1) = COPY %out_plus_16(p1)
+ ; GFX1010: G_STORE %load4_7(<4 x s32>), [[COPY2]](p1) :: (store 16, align 32, addrspace 1)
+ ; GFX1010: S_ENDPGM 0
+ %ptr:_(p4) = COPY $sgpr0_sgpr1
+ %out:_(p1) = COPY $sgpr2_sgpr3
+ %load:_(<8 x s32>) = G_LOAD %ptr(p4) :: (load 32, align 1, addrspace 4)
+ %load0_3:_(<4 x s32>), %load4_7:_(<4 x s32>) = G_UNMERGE_VALUES %load(<8 x s32>)
+ G_STORE %load0_3(<4 x s32>), %out(p1) :: (store 16, align 32, addrspace 1)
+ %cst_16:_(s64) = G_CONSTANT i64 16
+ %out_plus_16:_(p1) = G_PTR_ADD %out, %cst_16(s64)
+ G_STORE %load4_7(<4 x s32>), %out_plus_16(p1) :: (store 16, basealign 32, addrspace 1)
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list