[llvm] 59a4ee9 - [AArch64][GlobalISel] Legalize oversize G_EXTRACT_VECTOR_ELT sources.

Fri May 28 00:01:26 PDT 2021

Author: Amara Emerson
Date: 2021-05-27T23:52:24-07:00
New Revision: 59a4ee97288b1297bb98edd7f24fecd5e9c57170

URL: https://github.com/llvm/llvm-project/commit/59a4ee97288b1297bb98edd7f24fecd5e9c57170
DIFF: https://github.com/llvm/llvm-project/commit/59a4ee97288b1297bb98edd7f24fecd5e9c57170.diff

LOG: [AArch64][GlobalISel] Legalize oversize G_EXTRACT_VECTOR_ELT sources.

Also changes the fewerElements helper to use the lookthrough constant helper
instead of m_ICst, since m_ICst doesn't look through extends.

Differential Revision: https://reviews.llvm.org/D103227

Added: 
    

Modified: 
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 8dca8bf79088..4240f7fe6223 100644

--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3807,7 +3807,11 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
   // If the index is a constant, we can really break this down as you would
   // expect, and index into the target size pieces.
   int64_t IdxVal;
-  if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
+  auto MaybeCst =
+      getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true,
+                                        /*HandleFConstants*/ false);
+  if (MaybeCst) {
+    IdxVal = MaybeCst->Value.getSExtValue();
     // Avoid out of bounds indexing the pieces.
     if (IdxVal >= VecTy.getNumElements()) {
       MIRBuilder.buildUndef(DstReg);

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index b54140e9f292..f4058fcb5d3f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -639,7 +639,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
             return Query.Types[1].getNumElements() <= 16;
           },
           0, s8)
-      .minScalarOrElt(0, s8); // Worst case, we need at least s8.
+      .minScalarOrElt(0, s8) // Worst case, we need at least s8.
+      .clampMaxNumElements(1, s64, 2)
+      .clampMaxNumElements(1, s32, 4)
+      .clampMaxNumElements(1, s16, 8);
 
   getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
       .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index ecd156ff555b..fc5481c32043 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -116,7 +116,7 @@ define void @nonpow2_load_narrowing() {
 ; Currently can't handle vector lengths that aren't an exact multiple of
 ; natively supported vector lengths. Test that the fall-back works for those.
 ; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: <unknown>:0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s64) = G_EXTRACT_VECTOR_ELT %{{[0-9]+}}:_(<7 x s64>), %{{[0-9]+}}:_(s64) (in function: nonpow2_vector_add_fewerelements)
+; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %47:_(<14 x s64>) = G_CONCAT_VECTORS %41:_(<2 x s64>), %42:_(<2 x s64>), %43:_(<2 x s64>), %44:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>) (in function: nonpow2_vector_add_fewerelements)
 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_vector_add_fewerelements
 ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_vector_add_fewerelements:
 define void @nonpow2_vector_add_fewerelements() {

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
index b1f218d22c2a..1c8c45b69119 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-extract-vector-elt.mir
@@ -147,3 +147,101 @@ body: |
     $x0 = COPY %3(p0)
     RET_ReallyLR
 ...
+---
+name:            test_eve_v4s64
+body: |
+  bb.0:
+    liveins: $q0, $q1, $x0
+    ; CHECK-LABEL: name: test_eve_v4s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+    ; CHECK: %idx:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s64>), [[C]](s64)
+    ; CHECK: $x0 = COPY [[EVEC]](s64)
+    ; CHECK: RET_ReallyLR
+    %0:_(<2 x s64>) = COPY $q0
+    %1:_(<2 x s64>) = COPY $q1
+    %concat:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>)
+    %idx:_(s32) = G_CONSTANT i32 1
+    %idxprom:_(s64) = G_SEXT %idx(s32)
+    %3:_(s64) = G_EXTRACT_VECTOR_ELT %concat:_(<4 x s64>), %idxprom:_(s64)
+    $x0 = COPY %3(s64)
+    RET_ReallyLR
+...
+---
+name:            test_eve_v4s64_unknown_idx
+body: |
+  bb.0:
+    liveins: $q0, $q1, $x0
+    ; CHECK-LABEL: name: test_eve_v4s64_unknown_idx
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1
+    ; CHECK: %idx:_(s64) = COPY $x0
+    ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+    ; CHECK: G_STORE [[COPY]](<2 x s64>), [[FRAME_INDEX]](p0) :: (store 16 into %stack.0, align 32)
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
+    ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C]](s64)
+    ; CHECK: G_STORE [[COPY1]](<2 x s64>), [[PTR_ADD]](p0) :: (store 16 into %stack.0 + 16, basealign 32)
+    ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %idx, [[C1]]
+    ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+    ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[AND]], [[C2]]
+    ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[MUL]](s64)
+    ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load 8)
+    ; CHECK: $x0 = COPY [[LOAD]](s64)
+    ; CHECK: RET_ReallyLR
+    %0:_(<2 x s64>) = COPY $q0
+    %1:_(<2 x s64>) = COPY $q1
+    %concat:_(<4 x s64>) = G_CONCAT_VECTORS %0(<2 x s64>), %1(<2 x s64>)
+    %idx:_(s64) = COPY $x0
+    %3:_(s64) = G_EXTRACT_VECTOR_ELT %concat:_(<4 x s64>), %idx:_(s64)
+    $x0 = COPY %3(s64)
+    RET_ReallyLR
+...
+---
+name:            test_eve_v8s32
+body: |
+  bb.0:
+    liveins: $q0, $q1, $x0
+    ; CHECK-LABEL: name: test_eve_v8s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+    ; CHECK: %idx:_(s32) = G_CONSTANT i32 1
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
+    ; CHECK: $w0 = COPY [[EVEC]](s32)
+    ; CHECK: RET_ReallyLR
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %concat:_(<8 x s32>) = G_CONCAT_VECTORS %0(<4 x s32>), %1(<4 x s32>)
+    %idx:_(s32) = G_CONSTANT i32 1
+    %idxprom:_(s64) = G_SEXT %idx(s32)
+    %3:_(s32) = G_EXTRACT_VECTOR_ELT %concat:_(<8 x s32>), %idxprom:_(s64)
+    $w0 = COPY %3(s32)
+    RET_ReallyLR
+...
+---
+name:            test_eve_v16s16
+body: |
+  bb.0:
+    liveins: $q0, $q1, $x0
+    ; CHECK-LABEL: name: test_eve_v16s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
+    ; CHECK: %idx:_(s32) = G_CONSTANT i32 9
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[COPY1]](<8 x s16>), [[C]](s64)
+    ; CHECK: %ext:_(s32) = G_ANYEXT [[EVEC]](s16)
+    ; CHECK: $w0 = COPY %ext(s32)
+    ; CHECK: RET_ReallyLR
+    %0:_(<8 x s16>) = COPY $q0
+    %1:_(<8 x s16>) = COPY $q1
+    %concat:_(<16 x s16>) = G_CONCAT_VECTORS %0(<8 x s16>), %1(<8 x s16>)
+    %idx:_(s32) = G_CONSTANT i32 9
+    %idxprom:_(s64) = G_SEXT %idx(s32)
+    %3:_(s16) = G_EXTRACT_VECTOR_ELT %concat:_(<16 x s16>), %idxprom:_(s64)
+    %ext:_(s32) = G_ANYEXT %3
+    $w0 = COPY %ext(s32)
+    RET_ReallyLR
+...