[llvm] 73056f2 - [AArch64][GlobalISel] Simplify/nuke the merge/unmerge legalizer rules.

Wed Aug 11 16:45:30 PDT 2021

Author: Amara Emerson
Date: 2021-08-11T16:45:23-07:00
New Revision: 73056f239ef950f3a3e37769c99166306246e9cc

URL: https://github.com/llvm/llvm-project/commit/73056f239ef950f3a3e37769c99166306246e9cc
DIFF: https://github.com/llvm/llvm-project/commit/73056f239ef950f3a3e37769c99166306246e9cc.diff

LOG: [AArch64][GlobalISel] Simplify/nuke the merge/unmerge legalizer rules.

These rules were originally written when the new predicate based legalizer
was introduced in an attempt to preserve existing behaviour. It wasn't
properly kept up to date as things like vector support was split out into
G_CONCAT_VECTORS, and frankly, even if it was, it was too complex.

It's much easier to start from scratch with what we can actually support,
which is just a few type combinations. Anything illegal we should either
legalize, or should be eliminated as a side effect of artifact combination.

Differential Revision: https://reviews.llvm.org/D107937

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index d6ba885d37104..e589c2ab024a6 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -45,7 +45,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   const LLT s32 = LLT::scalar(32);
   const LLT s64 = LLT::scalar(64);
   const LLT s128 = LLT::scalar(128);
-  const LLT s256 = LLT::scalar(256);
   const LLT v16s8 = LLT::fixed_vector(16, 8);
   const LLT v8s8 = LLT::fixed_vector(8, 8);
   const LLT v4s8 = LLT::fixed_vector(4, 8);
@@ -534,76 +533,30 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
   for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
     unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
     unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
-
-    auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
-      const LLT &Ty = Query.Types[TypeIdx];
-      if (Ty.isVector()) {
-        const LLT &EltTy = Ty.getElementType();
-        if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
-          return true;
-        if (!isPowerOf2_32(EltTy.getSizeInBits()))
-          return true;
-      }
-      return false;
-    };
-
-    // FIXME: This rule is horrible, but specifies the same as what we had
-    // before with the particularly strange definitions removed (e.g.
-    // s8 = G_MERGE_VALUES s32, s32).
-    // Part of the complexity comes from these ops being extremely flexible. For
-    // example, you can build/decompose vectors with it, concatenate vectors,
-    // etc. and in addition to this you can also bitcast with it at the same
-    // time. We've been considering breaking it up into multiple ops to make it
-    // more manageable throughout the backend.
     getActionDefinitionsBuilder(Op)
-        // Break up vectors with weird elements into scalars
-        .fewerElementsIf(
-            [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
-            scalarize(0))
-        .fewerElementsIf(
-            [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
-            scalarize(1))
-        // Clamp the big scalar to s8-s128 and make it a power of 2.
-        .clampScalar(BigTyIdx, s8, s128)
-        .widenScalarIf(
-            [=](const LegalityQuery &Query) {
-              const LLT &Ty = Query.Types[BigTyIdx];
-              return !isPowerOf2_32(Ty.getSizeInBits()) &&
-                     Ty.getSizeInBits() % 64 != 0 && Ty.isScalar();
-            },
-            [=](const LegalityQuery &Query) {
-              // Pick the next power of 2, or a multiple of 64 over 128.
-              // Whichever is smaller.
-              const LLT &Ty = Query.Types[BigTyIdx];
-              unsigned NewSizeInBits = 1
-                                       << Log2_32_Ceil(Ty.getSizeInBits() + 1);
-              if (NewSizeInBits >= 256) {
-                unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
-                if (RoundedTo < NewSizeInBits)
-                  NewSizeInBits = RoundedTo;
-              }
-              return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
-            })
-        // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
-        // worth considering the multiples of 64 since 2*192 and 2*384 are not
-        // valid.
-        .clampScalar(LitTyIdx, s8, s256)
-        .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
-        // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
-        // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
-        // At this point it's simple enough to accept the legal types.
-        .legalIf([=](const LegalityQuery &Query) {
-          const LLT &BigTy = Query.Types[BigTyIdx];
-          const LLT &LitTy = Query.Types[LitTyIdx];
-          if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
+        .widenScalarToNextPow2(LitTyIdx, 8)
+        .widenScalarToNextPow2(BigTyIdx, 32)
+        .clampScalar(LitTyIdx, s8, s64)
+        .clampScalar(BigTyIdx, s32, s128)
+        .legalIf([=](const LegalityQuery &Q) {
+          switch (Q.Types[BigTyIdx].getSizeInBits()) {
+          case 32:
+          case 64:
+          case 128:
+            break;
+          default:
             return false;
-          if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
+          }
+          switch (Q.Types[LitTyIdx].getSizeInBits()) {
+          case 8:
+          case 16:
+          case 32:
+          case 64:
+            return true;
+          default:
             return false;
-          return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
-        })
-        // Any vectors left are the wrong size. Scalarize them.
-        .scalarize(0)
-        .scalarize(1);
+          }
+        });
   }
 
   getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
index 21cd6e4af6711..530fbdf932b23 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
@@ -51,19 +51,6 @@ end:
   br label %block
 }
 
-; Currently can't handle vector lengths that aren't an exact multiple of
-; natively supported vector lengths. Test that the fall-back works for those.
-; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: <unknown>:0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %47:_(<14 x s64>) = G_CONCAT_VECTORS %41:_(<2 x s64>), %42:_(<2 x s64>), %43:_(<2 x s64>), %44:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>), %29:_(<2 x s64>) (in function: nonpow2_vector_add_fewerelements)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_vector_add_fewerelements
-; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_vector_add_fewerelements:
-define void @nonpow2_vector_add_fewerelements() {
-  %dummy = add <7 x i64> undef, undef
-  %ex = extractelement <7 x i64> %dummy, i64 0
-  store i64 %ex, i64* undef
-  ret void
-}
-
 ; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: RET_ReallyLR implicit $x0 (in function: strict_align_feature)
 ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for strict_align_feature
 ; FALLBACK-WITH-REPORT-OUT-LABEL: strict_align_feature

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir
index a802baca4c8d2..e203fa05f998a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc  -march=aarch64 -O0 -run-pass=legalizer  %s -o - | FileCheck %s
+# RUN: llc  -march=aarch64 -O0 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s
 
 ---
 name:            test_merge_s4
@@ -26,3 +26,26 @@ body: |
     %4:_(s64) = G_ANYEXT %3
     $x0 = COPY %4
 ...
+---
+name:            test_merge_s16_s8
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: test_merge_s16_s8
+    ; CHECK: %a:_(s32) = COPY $w0
+    ; CHECK: %b:_(s32) = COPY $w1
+    ; CHECK: %a_t:_(s8) = G_TRUNC %a(s32)
+    ; CHECK: %b_t:_(s8) = G_TRUNC %b(s32)
+    ; CHECK: %m:_(s16) = G_MERGE_VALUES %a_t(s8), %b_t(s8)
+    ; CHECK: %ext:_(s64) = G_ANYEXT %m(s16)
+    ; CHECK: $x0 = COPY %ext(s64)
+
+    ; This isn't legal but we don't support widening the destination type.
+    %a:_(s32) = COPY $w0
+    %b:_(s32) = COPY $w1
+    %a_t:_(s8) = G_TRUNC %a
+    %b_t:_(s8) = G_TRUNC %b
+
+    %m:_(s16) = G_MERGE_VALUES %a_t, %b_t
+    %ext:_(s64) = G_ANYEXT %m
+    $x0 = COPY %ext
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index d90ed320e4641..92a755b0fb5ea 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -262,9 +262,9 @@ body:             |
     ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
     ; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C1]](s64)
     ; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C3]](s64)
+    ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3)
     ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64)
     ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64)
-    ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3)
     ; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64)
     ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
     ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C5]](s64)