[llvm] r351693 - AMDGPU/GlobalISel: Fix some crashs in g_unmerge_values/g_merge_values

Sun Jan 20 10:40:36 PST 2019

Author: arsenm
Date: Sun Jan 20 10:40:36 2019
New Revision: 351693

URL: http://llvm.org/viewvc/llvm-project?rev=351693&view=rev
Log:
AMDGPU/GlobalISel: Fix some crashs in g_unmerge_values/g_merge_values

This was crashing in the predicate function assuming the value
is a vector.

Copy more of what AArch64 uses. This probably needs more refinement
later, but I don't exactly understand what it means in some cases,
particularly since any legalization for these seems to be missing.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=351693&r1=351692&r2=351693&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Sun Jan 20 10:40:36 2019
@@ -35,6 +35,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
   const LLT S16 = LLT::scalar(16);
   const LLT S32 = LLT::scalar(32);
   const LLT S64 = LLT::scalar(64);
+  const LLT S256 = LLT::scalar(256);
   const LLT S512 = LLT::scalar(512);
 
   const LLT V2S16 = LLT::vector(2, 16);
@@ -298,25 +299,85 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
     unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
     unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
 
+    auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
+      const LLT &Ty = Query.Types[TypeIdx];
+      if (Ty.isVector()) {
+        const LLT &EltTy = Ty.getElementType();
+        if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
+          return true;
+        if (!isPowerOf2_32(EltTy.getSizeInBits()))
+          return true;
+      }
+      return false;
+    };
+
+    auto scalarize =
+      [=](const LegalityQuery &Query, unsigned TypeIdx) {
+      const LLT &Ty = Query.Types[TypeIdx];
+      return std::make_pair(TypeIdx, Ty.getElementType());
+    };
+
     getActionDefinitionsBuilder(Op)
+      // Break up vectors with weird elements into scalars
+      .fewerElementsIf(
+        [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
+        [=](const LegalityQuery &Query) { return scalarize(Query, 0); })
+      .fewerElementsIf(
+        [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
+        [=](const LegalityQuery &Query) { return scalarize(Query, 1); })
+      .clampScalar(BigTyIdx, S32, S512)
+      .widenScalarIf(
+        [=](const LegalityQuery &Query) {
+          const LLT &Ty = Query.Types[BigTyIdx];
+          return !isPowerOf2_32(Ty.getSizeInBits()) &&
+                 Ty.getSizeInBits() % 16 != 0;
+        },
+        [=](const LegalityQuery &Query) {
+          // Pick the next power of 2, or a multiple of 64 over 128.
+          // Whichever is smaller.
+          const LLT &Ty = Query.Types[BigTyIdx];
+          unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
+          if (NewSizeInBits >= 256) {
+            unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
+            if (RoundedTo < NewSizeInBits)
+              NewSizeInBits = RoundedTo;
+          }
+          return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
+        })
+      .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
+      // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
+      // worth considering the multiples of 64 since 2*192 and 2*384 are not
+      // valid.
+      .clampScalar(LitTyIdx, S16, S256)
+      .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
       .legalIf([=](const LegalityQuery &Query) {
           const LLT &BigTy = Query.Types[BigTyIdx];
           const LLT &LitTy = Query.Types[LitTyIdx];
-          return BigTy.getSizeInBits() % 32 == 0 &&
-                 LitTy.getSizeInBits() % 32 == 0 &&
+
+          if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
+            return false;
+          if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
+            return false;
+
+          return BigTy.getSizeInBits() % 16 == 0 &&
+                 LitTy.getSizeInBits() % 16 == 0 &&
                  BigTy.getSizeInBits() <= 512;
         })
       // Any vectors left are the wrong size. Scalarize them.
-      .fewerElementsIf([](const LegalityQuery &Query) { return true; },
-                       [](const LegalityQuery &Query) {
-                         return std::make_pair(
-                           0, Query.Types[0].getElementType());
-                       })
-      .fewerElementsIf([](const LegalityQuery &Query) { return true; },
-                       [](const LegalityQuery &Query) {
-                         return std::make_pair(
-                           1, Query.Types[1].getElementType());
-                       });
+      .fewerElementsIf([](const LegalityQuery &Query) {
+          return Query.Types[0].isVector();
+        },
+        [](const LegalityQuery &Query) {
+          return std::make_pair(
+            0, Query.Types[0].getElementType());
+        })
+      .fewerElementsIf([](const LegalityQuery &Query) {
+          return Query.Types[1].isVector();
+        },
+        [](const LegalityQuery &Query) {
+          return std::make_pair(
+            1, Query.Types[1].getElementType());
+        });
 
   }
 

Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir?rev=351693&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir Sun Jan 20 10:40:36 2019
@@ -0,0 +1,14 @@
+# RUN: not llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -o - %s 2>&1 | FileCheck %s
+
+# CHECK: LLVM ERROR: unable to legalize instruction: %1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0:_(<2 x s1>) (in function: test_unmerge_v2s1)
+
+---
+name: test_unmerge_v2s1
+body: |
+  bb.0:
+    %0:_(<2 x s1>) = G_IMPLICIT_DEF
+    %1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0
+    S_NOP 0, implicit %1
+    S_NOP 0, implicit %2
+...
+

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir?rev=351693&r1=351692&r2=351693&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir Sun Jan 20 10:40:36 2019
@@ -2,17 +2,76 @@
 # RUN: llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -o - %s | FileCheck %s
 
 ---
-name: test_unmerge_s64_s32
+name: test_unmerge_s32_s64
 body: |
   bb.0:
-    ; CHECK-LABEL: name: test_unmerge_s64_s32
+    ; CHECK-LABEL: name: test_unmerge_s32_s64
     ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
     ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
     ; CHECK: $vgpr0 = COPY [[UV]](s32)
-    ; CHECK: $vgpr2 = COPY [[UV1]](s32)
+    ; CHECK: $vgpr1 = COPY [[UV1]](s32)
     %0:_(s64) = G_CONSTANT i64 0
     %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64)
     $vgpr0 = COPY %1(s32)
-    $vgpr2 = COPY %2(s32)
+    $vgpr1 = COPY %2(s32)
+...
+
+---
+name: test_unmerge_s32_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: test_unmerge_s32_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; CHECK: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK: $vgpr21 = COPY [[UV1]](s32)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0
+    $vgpr0 = COPY %1
+    $vgpr21= COPY %2
+...
+
+---
+name: test_unmerge_s16_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_unmerge_s16_v2s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0
+    %3:_(s32) = G_ANYEXT %1
+    %4:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+    $vgpr1 = COPY %4
+...
+
+---
+name: test_unmerge_s16_v3s16
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: test_unmerge_s16_v3s16
+    ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+    ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
+    ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32)
+    %0:_(<3 x s16>) = G_IMPLICIT_DEF
+    %1:_(s16), %2:_(s16),  %3:_(s16) = G_UNMERGE_VALUES %0
+    %4:_(s32) = G_ANYEXT %1
+    %5:_(s32) = G_ANYEXT %2
+    %6:_(s32) = G_ANYEXT %3
+    $vgpr0 = COPY %4
+    $vgpr1 = COPY %5
+    $vgpr2 = COPY %6
 ...