[llvm] 35950fe - [GlobalISel] support narrow G_IMPLICIT_DEF for DstSize % NarrowSize != 0

Dominik Montada via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 8 02:00:15 PDT 2020


Author: Dominik Montada
Date: 2020-04-08T11:00:07+02:00
New Revision: 35950fea8d4f175d04aa27e5e0350e28bef36429

URL: https://github.com/llvm/llvm-project/commit/35950fea8d4f175d04aa27e5e0350e28bef36429
DIFF: https://github.com/llvm/llvm-project/commit/35950fea8d4f175d04aa27e5e0350e28bef36429.diff

LOG: [GlobalISel] support narrow G_IMPLICIT_DEF for DstSize % NarrowSize != 0

Summary:
When narrowing G_IMPLICIT_DEF where the original size is not a multiple
of the narrow size, emit a smaller G_IMPLICIT_DEF and use G_ANYEXT.

To prevent a potential endless loop in the legalizer, the condition
to combine G_ANYEXT(G_IMPLICIT_DEF) is changed from isInstUnsupported
to !isInstLegal, since in this case the combine is only valid if
consequent legalization of the newly combined G_IMPLICIT_DEF does not
introduce G_ANYEXT due to narrowing.

Although this legalization for G_IMPLICIT_DEF would also be valid for
the general case, it actually caused a lot of code regressions when
tried due to superfluous COPYs and combines not getting hit anymore.

Reviewers: dsanders, aemerson, volkan, arsenm, aditya_nandakumar

Reviewed By: arsenm

Subscribers: jvesely, nhaehnle, kerbowa, wdng, rovka, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D76598

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
    llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
    llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 41577a86b212..ae82e98f5326 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -269,7 +269,7 @@ class LegalizationArtifactCombiner {
 
       if (Opcode == TargetOpcode::G_ANYEXT) {
         // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
-        if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
+        if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
           return false;
         LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
         Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});

diff  --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3c24933921b2..001c877d3e74 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -741,19 +741,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
   default:
     return UnableToLegalize;
   case TargetOpcode::G_IMPLICIT_DEF: {
-    // FIXME: add support for when SizeOp0 isn't an exact multiple of
-    // NarrowSize.
-    if (SizeOp0 % NarrowSize != 0)
-      return UnableToLegalize;
+    Register DstReg = MI.getOperand(0).getReg();
+    LLT DstTy = MRI.getType(DstReg);
+
+    // If SizeOp0 is not an exact multiple of NarrowSize, emit
+    // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
+    // FIXME: Although this would also be legal for the general case, it causes
+    //  a lot of regressions in the emitted code (superfluous COPYs, artifact
+    //  combines not being hit). This seems to be a problem related to the
+    //  artifact combiner.
+    if (SizeOp0 % NarrowSize != 0) {
+      LLT ImplicitTy = NarrowTy;
+      if (DstTy.isVector())
+        ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
+
+      Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
+      MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
+
+      MI.eraseFromParent();
+      return Legalized;
+    }
+
     int NumParts = SizeOp0 / NarrowSize;
 
     SmallVector<Register, 2> DstRegs;
     for (int i = 0; i < NumParts; ++i)
-      DstRegs.push_back(
-          MIRBuilder.buildUndef(NarrowTy).getReg(0));
+      DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
 
-    Register DstReg = MI.getOperand(0).getReg();
-    if(MRI.getType(DstReg).isVector())
+    if (DstTy.isVector())
       MIRBuilder.buildBuildVector(DstReg, DstRegs);
     else
       MIRBuilder.buildMerge(DstReg, DstRegs);

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir
new file mode 100644
index 000000000000..fea572d873d9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir
@@ -0,0 +1,17 @@
+# RUN: not --crash llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - 2>&1 | FileCheck %s
+# RUN: not --crash llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - 2>&1 | FileCheck %s
+
+# This test is not legalizable and produces more than 9000 instructions for it
+# before it fails, including legalizer artifacts like
+# %foo:_(s65600) = G_MERGE_VALUES. Since it is not really feasible to have that
+# many CHECK-lines, only the presence of the error is checked.
+
+---
+name: test_implicit_def_s1025
+body: |
+  bb.0:
+    ; CHECK: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(s65600) = G_MERGE_VALUES
+    %0:_(s1025) = G_IMPLICIT_DEF
+    %1:_(s32) = G_TRUNC %0
+    $vgpr0 = COPY %1
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
index 0ff5bbcb5a6d..061fbd72e618 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
@@ -174,29 +174,35 @@ body: |
     $vgpr0 = COPY %1
 ...
 
----
-name: test_implicit_def_s1025
-body: |
-  bb.0:
-
-    ; CHECK-LABEL: name: test_implicit_def_s1025
-    ; CHECK: [[DEF:%[0-9]+]]:_(s1025) = G_IMPLICIT_DEF
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1025)
-    ; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
-    %0:_(s1025) = G_IMPLICIT_DEF
-    %1:_(s32) = G_TRUNC %0
-    $vgpr0 = COPY %1
-...
-
 ---
 name: test_implicit_def_s1056
 body: |
   bb.0:
 
     ; CHECK-LABEL: name: test_implicit_def_s1056
-    ; CHECK: [[DEF:%[0-9]+]]:_(s1056) = G_IMPLICIT_DEF
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1056)
-    ; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024)
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32)
+    ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[UV5]](s32)
+    ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV6]](s32), [[UV7]](s32)
+    ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV9]](s32)
+    ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV10]](s32), [[UV11]](s32)
+    ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV13]](s32)
+    ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV14]](s32), [[UV15]](s32)
+    ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s32), [[UV17]](s32)
+    ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV18]](s32), [[UV19]](s32)
+    ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV20]](s32), [[UV21]](s32)
+    ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV22]](s32), [[UV23]](s32)
+    ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV25]](s32)
+    ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV26]](s32), [[UV27]](s32)
+    ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV28]](s32), [[UV29]](s32)
+    ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV30]](s32), [[UV31]](s32)
+    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: [[MV16:%[0-9]+]]:_(s2112) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1056) = G_TRUNC [[MV16]](s2112)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[TRUNC]](s1056)
+    ; CHECK: $vgpr0 = COPY [[TRUNC1]](s32)
     %0:_(s1056) = G_IMPLICIT_DEF
     %1:_(s32) = G_TRUNC %0
     $vgpr0 = COPY %1

diff  --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index ffdd108cd853..4c0834e8bfaf 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -2748,4 +2748,60 @@ TEST_F(AArch64GISelMITest, CreateLibcall) {
   EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
 }
 
+// Test narrowing of G_IMPLICIT_DEF
+TEST_F(AArch64GISelMITest, NarrowImplicitDef) {
+  setUp();
+  if (!TM)
+    return;
+
+  DefineLegalizerInfo(A, {});
+
+  // Make sure that G_IMPLICIT_DEF can be narrowed if the original size is not a
+  // multiple of narrow size
+  LLT S32{LLT::scalar(32)};
+  LLT S48{LLT::scalar(48)};
+  LLT S64{LLT::scalar(64)};
+  LLT V2S64{{LLT::vector(2, 64)}};
+
+  auto Implicit1 = B.buildUndef(S64);
+  auto Implicit2 = B.buildUndef(S64);
+  auto Implicit3 = B.buildUndef(V2S64);
+  auto Implicit4 = B.buildUndef(V2S64);
+
+  AInfo Info(MF->getSubtarget());
+  DummyGISelObserver Observer;
+  LegalizerHelper Helper(*MF, Info, Observer, B);
+
+  // Perform Legalization
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.narrowScalar(*Implicit1, 0, S48));
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.narrowScalar(*Implicit2, 0, S32));
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.narrowScalar(*Implicit3, 0, S48));
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.narrowScalar(*Implicit4, 0, S32));
+
+  const auto *CheckStr = R"(
+  CHECK: [[DEF:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF
+  CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[DEF]]
+
+  CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]]:_(s32), [[DEF1]]
+
+  CHECK: [[DEF:%[0-9]+]]:_(<2 x s48>) = G_IMPLICIT_DEF
+  CHECK: [[ANYEXT:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[DEF]]
+
+  CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  CHECK: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  CHECK: [[BV:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]]:_(s32), [[DEF1]]:_(s32), [[DEF2]]:_(s32), [[DEF3]]:_(s32)
+  )";
+
+  // Check
+  EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
+}
+
 } // namespace


        


More information about the llvm-commits mailing list