[llvm] 27f8bd9 - AMDGPU/GlobalISel: Fix select of <8 x s64> scalar load
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 11 16:09:50 PDT 2020
Author: Matt Arsenault
Date: 2020-06-11T19:09:43-04:00
New Revision: 27f8bd94cb99ad711d2b13147d5dfc6b9037fe4b
URL: https://github.com/llvm/llvm-project/commit/27f8bd94cb99ad711d2b13147d5dfc6b9037fe4b
DIFF: https://github.com/llvm/llvm-project/commit/27f8bd94cb99ad711d2b13147d5dfc6b9037fe4b.diff
LOG: AMDGPU/GlobalISel: Fix select of <8 x s64> scalar load
Added:
Modified:
llvm/lib/Target/AMDGPU/SMInstructions.td
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 1fdb53bbf394..5148a3028edf 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -829,7 +829,10 @@ defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>;
foreach vt = SReg_256.RegTypes in {
defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>;
}
-defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
+
+foreach vt = SReg_512.RegTypes in {
+defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
+}
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
index 309bd1101a86..080189ab0dbe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
@@ -683,6 +683,120 @@ body: |
...
+---
+
+name: load_constant_v8s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; GFX6-LABEL: name: load_constant_v8s32
+ ; GFX6: liveins: $sgpr0_sgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX6: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
+ ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
+ ; GFX7-LABEL: name: load_constant_v8s32
+ ; GFX7: liveins: $sgpr0_sgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX7: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
+ ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
+ ; GFX8-LABEL: name: load_constant_v8s32
+ ; GFX8: liveins: $sgpr0_sgpr1
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX8: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
+ ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
+ ; GFX10-LABEL: name: load_constant_v8s32
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM [[COPY]], 0, 0, 0 :: (load 32, align 4, addrspace 4)
+ ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY [[S_LOAD_DWORDX8_IMM]]
+ %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ %1:sgpr(<8 x s32>) = G_LOAD %0 :: (load 32, align 4, addrspace 4)
+ $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %1
+
+...
+
+---
+
+name: load_constant_v16s32
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; GFX6-LABEL: name: load_constant_v16s32
+ ; GFX6: liveins: $sgpr0_sgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
+ ; GFX7-LABEL: name: load_constant_v16s32
+ ; GFX7: liveins: $sgpr0_sgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
+ ; GFX8-LABEL: name: load_constant_v16s32
+ ; GFX8: liveins: $sgpr0_sgpr1
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
+ ; GFX10-LABEL: name: load_constant_v16s32
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
+ %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ %1:sgpr(<16 x s32>) = G_LOAD %0 :: (load 64, align 4, addrspace 4)
+ $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1
+
+...
+
+---
+
+name: load_constant_v8s64
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; GFX6-LABEL: name: load_constant_v8s64
+ ; GFX6: liveins: $sgpr0_sgpr1
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX6: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX6: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
+ ; GFX7-LABEL: name: load_constant_v8s64
+ ; GFX7: liveins: $sgpr0_sgpr1
+ ; GFX7: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX7: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX7: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
+ ; GFX8-LABEL: name: load_constant_v8s64
+ ; GFX8: liveins: $sgpr0_sgpr1
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX8: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX8: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
+ ; GFX10-LABEL: name: load_constant_v8s64
+ ; GFX10: liveins: $sgpr0_sgpr1
+ ; GFX10: $vcc_hi = IMPLICIT_DEF
+ ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM [[COPY]], 0, 0, 0 :: (load 64, align 4, addrspace 4)
+ ; GFX10: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY [[S_LOAD_DWORDX16_IMM]]
+ %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ %1:sgpr(<8 x s64>) = G_LOAD %0 :: (load 64, align 4, addrspace 4)
+ $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %1
+
+...
+
################################################################################
### Stress addressing modes
################################################################################
More information about the llvm-commits
mailing list