[llvm] 59a4ee9 - [AArch64][GlobalISel] Legalize oversize G_EXTRACT_VECTOR_ELT sources.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Fri May 28 00:01:26 PDT 2021
Author: Amara Emerson
Date: 2021-05-27T23:52:24-07:00
New Revision: 59a4ee97288b1297bb98edd7f24fecd5e9c57170
URL: https://github.com/llvm/llvm-project/commit/59a4ee97288b1297bb98edd7f24fecd5e9c57170
DIFF: https://github.com/llvm/llvm-project/commit/59a4ee97288b1297bb98edd7f24fecd5e9c57170.diff
LOG: [AArch64][GlobalISel] Legalize oversize G_EXTRACT_VECTOR_ELT sources.
Also changes the fewerElements helper to use the lookthrough constant helper
instead of m_ICst, since m_ICst doesn't look through extends.
Differential Revision: https://reviews.llvm.org/D103227
Added:
Modified:
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 8dca8bf79088..4240f7fe6223 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3807,7 +3807,11 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
// If the index is a constant, we can really break this down as you would
// expect, and index into the target size pieces.
int64_t IdxVal;
- if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+ auto MaybeCst =
+ getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true,
+ /*HandleFConstants*/ false);
+ if (MaybeCst) {
+ IdxVal = MaybeCst->Value.getSExtValue();
// Avoid out of bounds indexing the pieces.
if (IdxVal >= VecTy.getNumElements()) {
MIRBuilder.buildUndef(DstReg);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index b54140e9f292..f4058fcb5d3f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -639,7 +639,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return Query.Types[1].getNumElements() <= 16;
},
0, s8)
- .minScalarOrElt(0, s8); // Worst case, we need at least s8.
+ .minScalarOrElt(0, s8) // Worst case, we need at least s8.
+ .clampMaxNumElements(1, s64, 2)
+ .clampMaxNumElements(1, s32, 4)
+ .clampMaxNumElements(1, s16, 8);
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
.legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index ecd156ff555b..fc5481c32043 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -116,7 +116,7 @@ define void @nonpow2_load_narrowing() {
; Currently can't handle vector lengths that aren't an exact multiple of
; natively supported vector lengths. Test that the fall-back works for those.
; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: <unknown>:0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s64) = G_EXTRACT_VECTOR_ELT %{{[0-9]+}}:_(<7 x s64>), %{{[0-9]+}}:_(s64) (in function: nonpow2_vector_add_fewerelements)
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %47:_(<14 x s64>) = G_CONCAT_VECTORS %41:_(<2 x s64>), %42:_(<2 x s64>), %43:_(<2 x s64>), %44:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>) (in function: nonpow2_vector_add_fewerelements)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_vector_add_fewerelements
; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_vector_add_fewerelements:
define void @nonpow2_vector_add_fewerelements() {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
index b1f218d22c2a..1c8c45b69119 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
@@ -147,3 +147,101 @@ body: |
$x0 = COPY %3(p0)
RET_ReallyLR
...
+---
+name: test_eve_v4s64
+body: |
+ bb.0:
+ liveins: $q0, $q1, $x0
+ ; CHECK-LABEL: name: test_eve_v4s64
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK: %idx:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
+ ; CHECK: $x0 = COPY [[EVEC]](s64)
+ ; CHECK: RET_ReallyLR
+ %0:_(<2 x s64>) = COPY $q0
+ %1:_(<2 x s64>) = COPY $q1
+ %concat:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>)
+ %idx:_(s32) = G_CONSTANT i32 1
+ %idxprom:_(s64) = G_SEXT %idx(s32)
+ %3:_(s64) = G_EXTRACT_VECTOR_ELT %concat:_(<4 x s64>), %idxprom:_(s64)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR
+...
+---
+name: test_eve_v4s64_unknown_idx
+body: |
+ bb.0:
+ liveins: $q0, $q1, $x0
+ ; CHECK-LABEL: name: test_eve_v4s64_unknown_idx
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+ ; CHECK: %idx:_(s64) = COPY $x0
+ ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; CHECK: G_STORE [[COPY]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store 16 into %stack.0, align 32)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+ ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64)
+ ; CHECK: G_STORE [[COPY1]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into %stack.0 + 16, basealign 32)
+ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %idx, [[C1]]
+ ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C2]]
+ ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64)
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load 8)
+ ; CHECK: $x0 = COPY [[LOAD]](s64)
+ ; CHECK: RET_ReallyLR
+ %0:_(<2 x s64>) = COPY $q0
+ %1:_(<2 x s64>) = COPY $q1
+ %concat:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>)
+ %idx:_(s64) = COPY $x0
+ %3:_(s64) = G_EXTRACT_VECTOR_ELT %concat:_(<4 x s64>), %idx:_(s64)
+ $x0 = COPY %3(s64)
+ RET_ReallyLR
+...
+---
+name: test_eve_v8s32
+body: |
+ bb.0:
+ liveins: $q0, $q1, $x0
+ ; CHECK-LABEL: name: test_eve_v8s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK: %idx:_(s32) = G_CONSTANT i32 1
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+ ; CHECK: $w0 = COPY [[EVEC]](s32)
+ ; CHECK: RET_ReallyLR
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %concat:_(<8 x s32>) = G_CONCAT_VECTORS %0(<4 x s32>), %1(<4 x s32>)
+ %idx:_(s32) = G_CONSTANT i32 1
+ %idxprom:_(s64) = G_SEXT %idx(s32)
+ %3:_(s32) = G_EXTRACT_VECTOR_ELT %concat:_(<8 x s32>), %idxprom:_(s64)
+ $w0 = COPY %3(s32)
+ RET_ReallyLR
+...
+---
+name: test_eve_v16s16
+body: |
+ bb.0:
+ liveins: $q0, $q1, $x0
+ ; CHECK-LABEL: name: test_eve_v16s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
+ ; CHECK: %idx:_(s32) = G_CONSTANT i32 9
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C]](s64)
+ ; CHECK: %ext:_(s32) = G_ANYEXT [[EVEC]](s16)
+ ; CHECK: $w0 = COPY %ext(s32)
+ ; CHECK: RET_ReallyLR
+ %0:_(<8 x s16>) = COPY $q0
+ %1:_(<8 x s16>) = COPY $q1
+ %concat:_(<16 x s16>) = G_CONCAT_VECTORS %0(<8 x s16>), %1(<8 x s16>)
+ %idx:_(s32) = G_CONSTANT i32 9
+ %idxprom:_(s64) = G_SEXT %idx(s32)
+ %3:_(s16) = G_EXTRACT_VECTOR_ELT %concat:_(<16 x s16>), %idxprom:_(s64)
+ %ext:_(s32) = G_ANYEXT %3
+ $w0 = COPY %ext(s32)
+ RET_ReallyLR
+...
More information about the llvm-commits
mailing list