[PATCH] D74518: [AMDGPU][GlobalISel] Handle 64byte EltSIze in getRegSplitParts
Austin Kerbow via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 17:34:57 PST 2020
kerbowa created this revision.
kerbowa added a reviewer: arsenm.
Herald added subscribers: llvm-commits, hiraditya, t-tye, tpr, dstuttard, rovka, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D74518
Files:
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir
Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir
+++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir
@@ -586,6 +586,26 @@
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %8
...
+---
+name: test_concat_vectors_s_v32s32_s_v16s32_s_v16s32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+
+ ; GCN-LABEL: name: test_concat_vectors_s_v32s32_s_v16s32_s_v16s32
+ ; GCN: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ ; GCN: [[COPY1:%[0-9]+]]:sreg_512 = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
+ ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[REG_SEQUENCE]]
+ %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+ %1:sgpr(<16 x s32>) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
+ %2:sgpr(<32 x s32>) = G_CONCAT_VECTORS %0, %1
+ $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2
+...
+
---
name: test_concat_vectors_s_v4s64_s_v2s64_s_v2s64
legalized: true
Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1772,27 +1772,42 @@
}
}
- assert(EltSize == 32 && "unhandled elt size");
+ if (EltSize == 32) {
+ static const int16_t Sub0_31_256[] = {
+ AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
+ AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
+ AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
+ AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
+ };
- static const int16_t Sub0_31_256[] = {
- AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
- AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
- AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
- AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
- };
+ static const int16_t Sub0_15_256[] = {
+ AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
+ AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
+ };
+
+ switch (AMDGPU::getRegBitWidth(*RC->MC)) {
+ case 256:
+ return {};
+ case 512:
+ return makeArrayRef(Sub0_15_256);
+ case 1024:
+ return makeArrayRef(Sub0_31_256);
+ default:
+ llvm_unreachable("unhandled register size");
+ }
+ }
- static const int16_t Sub0_15_256[] = {
- AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
- AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
+ assert(EltSize == 64 && "unhandled elt size");
+ static const int16_t Sub0_31_512[] = {
+ AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
+ AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
};
switch (AMDGPU::getRegBitWidth(*RC->MC)) {
- case 256:
- return {};
case 512:
- return makeArrayRef(Sub0_15_256);
+ return {};
case 1024:
- return makeArrayRef(Sub0_31_256);
+ return makeArrayRef(Sub0_31_512);
default:
llvm_unreachable("unhandled register size");
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D74518.244308.patch
Type: text/x-patch
Size: 4460 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200213/d7b4bd25/attachment-0001.bin>
More information about the llvm-commits
mailing list