[llvm] f4a38c1 - AMDGPU/GlobalISel: Look through casts when legalizing vector indexing
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 9 15:38:15 PST 2020
Author: Matt Arsenault
Date: 2020-02-09T18:02:10-05:00
New Revision: f4a38c114e124c21549d06281554658687012179
URL: https://github.com/llvm/llvm-project/commit/f4a38c114e124c21549d06281554658687012179
DIFF: https://github.com/llvm/llvm-project/commit/f4a38c114e124c21549d06281554658687012179.diff
LOG: AMDGPU/GlobalISel: Look through casts when legalizing vector indexing
We were failing to find constants that were casted. I feel like the
artifact combiner should have folded the constant in the trunc before
the custom lowering, but that doesn't happen.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 49b2ce9c60ab..13a084ca14c3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1668,8 +1668,12 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
// TODO: Should move some of this into LegalizerHelper.
// TODO: Promote dynamic indexing of s16 to s32
- // TODO: Dynamic s64 indexing is only legal for SGPR.
- Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
+
+ // FIXME: Artifact combiner probably should have replaced the truncated
+ // constant before this, so we shouldn't need
+ // getConstantVRegValWithLookThrough.
+ Optional<ValueAndVReg> IdxVal = getConstantVRegValWithLookThrough(
+ MI.getOperand(2).getReg(), MRI);
if (!IdxVal) // Dynamic case will be selected to register indexing.
return true;
@@ -1682,8 +1686,8 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
B.setInstr(MI);
- if (IdxVal.getValue() < VecTy.getNumElements())
- B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
+ if (IdxVal->Value < VecTy.getNumElements())
+ B.buildExtract(Dst, Vec, IdxVal->Value * EltTy.getSizeInBits());
else
B.buildUndef(Dst);
@@ -1697,8 +1701,12 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
// TODO: Should move some of this into LegalizerHelper.
// TODO: Promote dynamic indexing of s16 to s32
- // TODO: Dynamic s64 indexing is only legal for SGPR.
- Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
+
+ // FIXME: Artifact combiner probably should have replaced the truncated
+ // constant before this, so we shouldn't need
+ // getConstantVRegValWithLookThrough.
+ Optional<ValueAndVReg> IdxVal = getConstantVRegValWithLookThrough(
+ MI.getOperand(3).getReg(), MRI);
if (!IdxVal) // Dynamic case will be selected to register indexing.
return true;
@@ -1712,8 +1720,8 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
B.setInstr(MI);
- if (IdxVal.getValue() < VecTy.getNumElements())
- B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
+ if (IdxVal->Value < VecTy.getNumElements())
+ B.buildInsert(Dst, Vec, Ins, IdxVal->Value * EltTy.getSizeInBits());
else
B.buildUndef(Dst);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
index 06d3319f6b75..eba8a2393e77 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
@@ -275,9 +275,8 @@ body: |
; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1
; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
- ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1)
- ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[SEXT]](s32)
- ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32)
; CHECK: $vgpr0 = COPY [[COPY1]](s32)
%0:_(<2 x s1>) = G_IMPLICIT_DEF
%1:_(s1) = G_CONSTANT i1 false
@@ -676,3 +675,22 @@ body: |
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
$vgpr0_vgpr1 = COPY %2
...
+
+# Make sure we look through casts looking for a constant index.
+---
+name: extract_vector_elt_look_through_trunc_0_v4i32
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: extract_vector_elt_look_through_trunc_0_v4i32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0
+ ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+ %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(s64) = G_CONSTANT i64 0
+ %2:_(s32) = G_TRUNC %1
+ %3:_(s32) = G_EXTRACT_VECTOR_ELT %0, %2
+ $vgpr0 = COPY %3
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
index 760c2ae6f136..bb18c32909f5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -127,10 +127,8 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
- ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
- ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[SEXT_INREG]](s32)
- ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
+ ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+ ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
%1:_(s32) = COPY $vgpr2
%2:_(s8) = G_CONSTANT i8 0
@@ -161,3 +159,24 @@ body: |
%5:_(<2 x s32>) = G_ANYEXT %4
$vgpr0_vgpr1 = COPY %5
...
+
+---
+name: insert_vector_elt_v4s32_s32_look_through_trunc_0
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
+
+ ; CHECK-LABEL: name: insert_vector_elt_v4s32_s32_look_through_trunc_0
+ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+ ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+ %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %1:_(s32) = COPY $vgpr4
+ %2:_(s64) = G_CONSTANT i64 0
+ %3:_(s32) = G_TRUNC %2
+ %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %3
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
+...
More information about the llvm-commits
mailing list