[llvm] 2126c70 - AMDGPU/GlobalISel: Don't mis-select vector index on a constant
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 9 15:38:16 PST 2020
Author: Matt Arsenault
Date: 2020-02-09T18:02:37-05:00
New Revision: 2126c70e3a628cb772d7b9f63cb897857214245a
URL: https://github.com/llvm/llvm-project/commit/2126c70e3a628cb772d7b9f63cb897857214245a
DIFF: https://github.com/llvm/llvm-project/commit/2126c70e3a628cb772d7b9f63cb897857214245a.diff
LOG: AMDGPU/GlobalISel: Don't mis-select vector index on a constant
Vector indexing with a constant index should be folded out in the
legalizer, but this was accidentally falling through. This would
produce the indexing operation with $noreg. Handle this case as a
dynamic index just in case a bug like this happens again in the
future.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 862862366560..ff7cb9dc2c1a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1721,6 +1721,12 @@ computeIndirectRegIndex(MachineRegisterInfo &MRI,
std::tie(IdxBaseReg, Offset, Unused)
= AMDGPU::getBaseWithConstantOffset(MRI, IdxReg);
+ if (IdxBaseReg == AMDGPU::NoRegister) {
+ // This will happen if the index is a known constant. This should ordinarily
+ // be legalized out, but handle it as a register just in case.
+ assert(Offset == 0);
+ IdxBaseReg = IdxReg;
+ }
ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SuperRC, EltSize);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
index bb6b6a711cec..bd349646e81a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
@@ -784,3 +784,58 @@ body: |
%4:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %3
S_ENDPGM 0, implicit %4
...
+
+---
+name: extract_vector_elt_s_s32_v4s32_const_idx
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+
+ ; MOVREL-LABEL: name: extract_vector_elt_s_s32_v4s32_const_idx
+ ; MOVREL: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; MOVREL: $m0 = COPY [[S_MOV_B32_]]
+ ; MOVREL: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v4s32_const_idx
+ ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GPRIDX: $m0 = COPY [[S_MOV_B32_]]
+ ; GPRIDX: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
+ %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s32) = G_CONSTANT i32 0
+ %2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
+
+---
+name: extract_vector_elt_v_s32_v4s32_const_idx
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+ ; MOVREL-LABEL: name: extract_vector_elt_v_s32_v4s32_const_idx
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; MOVREL: $m0 = COPY [[S_MOV_B32_]]
+ ; MOVREL: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 undef [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
+ ; MOVREL: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
+ ; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v4s32_const_idx
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GPRIDX: S_SET_GPR_IDX_ON [[S_MOV_B32_]], 1, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 undef [[COPY]].sub0, implicit $exec, implicit [[COPY]], implicit $m0
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+ %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s32) = G_CONSTANT i32 0
+ %2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
+ S_ENDPGM 0, implicit %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir
index 76af6f0062a8..ac3d623fb3d0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert-vector-elt.mir
@@ -624,3 +624,66 @@ body: |
%5:sgpr(<8 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %4
S_ENDPGM 0, implicit %5
...
+
+# This should have been folded out in the legalizer, but make sure it
+# doesn't crash.
+---
+name: insert_vector_elt_s_s32_v4s32_const_idx
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+
+ ; MOVREL-LABEL: name: insert_vector_elt_s_s32_v4s32_const_idx
+ ; MOVREL: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; MOVREL: $m0 = COPY [[S_MOV_B32_]]
+ ; MOVREL: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0
+ ; MOVREL: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]]
+ ; GPRIDX-LABEL: name: insert_vector_elt_s_s32_v4s32_const_idx
+ ; GPRIDX: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GPRIDX: $m0 = COPY [[S_MOV_B32_]]
+ ; GPRIDX: [[S_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:sgpr_128 = S_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0
+ ; GPRIDX: S_ENDPGM 0, implicit [[S_INDIRECT_REG_WRITE_B32_V4_]]
+ %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s32) = COPY $sgpr4
+ %2:sgpr(s32) = G_CONSTANT i32 0
+ %3:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: insert_vector_elt_v_s32_v4s32_const_idx
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4
+
+ ; MOVREL-LABEL: name: insert_vector_elt_v_s32_v4s32_const_idx
+ ; MOVREL: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; MOVREL: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; MOVREL: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; MOVREL: $m0 = COPY [[S_MOV_B32_]]
+ ; MOVREL: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec
+ ; MOVREL: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]]
+ ; GPRIDX-LABEL: name: insert_vector_elt_v_s32_v4s32_const_idx
+ ; GPRIDX: [[COPY:%[0-9]+]]:vreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GPRIDX: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GPRIDX: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; GPRIDX: S_SET_GPR_IDX_ON [[S_MOV_B32_]], 8, implicit-def $m0, implicit $m0
+ ; GPRIDX: [[V_INDIRECT_REG_WRITE_B32_V4_:%[0-9]+]]:vreg_128 = V_INDIRECT_REG_WRITE_B32_V4 [[COPY]], [[COPY1]], 1, implicit $m0, implicit $exec
+ ; GPRIDX: S_SET_GPR_IDX_OFF
+ ; GPRIDX: S_ENDPGM 0, implicit [[V_INDIRECT_REG_WRITE_B32_V4_]]
+ %0:vgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ %1:sgpr(s32) = COPY $sgpr4
+ %2:sgpr(s32) = G_CONSTANT i32 0
+ %3:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2
+ S_ENDPGM 0, implicit %3
+...
More information about the llvm-commits
mailing list