[llvm] 35950fe - [GlobalISel] support narrow G_IMPLICIT_DEF for DstSize % NarrowSize != 0
Dominik Montada via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 8 02:00:15 PDT 2020
Author: Dominik Montada
Date: 2020-04-08T11:00:07+02:00
New Revision: 35950fea8d4f175d04aa27e5e0350e28bef36429
URL: https://github.com/llvm/llvm-project/commit/35950fea8d4f175d04aa27e5e0350e28bef36429
DIFF: https://github.com/llvm/llvm-project/commit/35950fea8d4f175d04aa27e5e0350e28bef36429.diff
LOG: [GlobalISel] support narrow G_IMPLICIT_DEF for DstSize % NarrowSize != 0
Summary:
When narrowing G_IMPLICIT_DEF where the original size is not a multiple
of the narrow size, emit a smaller G_IMPLICIT_DEF and use G_ANYEXT.
To prevent a potential endless loop in the legalizer, the condition
to combine G_ANYEXT(G_IMPLICIT_DEF) is changed from isInstUnsupported
to !isInstLegal, since in this case the combine is only valid if
consequent legalization of the newly combined G_IMPLICIT_DEF does not
introduce G_ANYEXT due to narrowing.
Although this legalization for G_IMPLICIT_DEF would also be valid for
the general case, it actually caused a lot of code regressions when
tried due to superfluous COPYs and combines not getting hit anymore.
Reviewers: dsanders, aemerson, volkan, arsenm, aditya_nandakumar
Reviewed By: arsenm
Subscribers: jvesely, nhaehnle, kerbowa, wdng, rovka, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D76598
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 41577a86b212..ae82e98f5326 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -269,7 +269,7 @@ class LegalizationArtifactCombiner {
if (Opcode == TargetOpcode::G_ANYEXT) {
// G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
- if (isInstUnsupported({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
+ if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
return false;
LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3c24933921b2..001c877d3e74 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -741,19 +741,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
default:
return UnableToLegalize;
case TargetOpcode::G_IMPLICIT_DEF: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ // If SizeOp0 is not an exact multiple of NarrowSize, emit
+ // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
+ // FIXME: Although this would also be legal for the general case, it causes
+ // a lot of regressions in the emitted code (superfluous COPYs, artifact
+ // combines not being hit). This seems to be a problem related to the
+ // artifact combiner.
+ if (SizeOp0 % NarrowSize != 0) {
+ LLT ImplicitTy = NarrowTy;
+ if (DstTy.isVector())
+ ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
+
+ Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
+ MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
int NumParts = SizeOp0 / NarrowSize;
SmallVector<Register, 2> DstRegs;
for (int i = 0; i < NumParts; ++i)
- DstRegs.push_back(
- MIRBuilder.buildUndef(NarrowTy).getReg(0));
+ DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
- Register DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
+ if (DstTy.isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
MIRBuilder.buildMerge(DstReg, DstRegs);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir
new file mode 100644
index 000000000000..fea572d873d9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def-s1025.mir
@@ -0,0 +1,17 @@
+# RUN: not --crash llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - 2>&1 | FileCheck %s
+# RUN: not --crash llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - 2>&1 | FileCheck %s
+
+# This test is not legalizable and produces more than 9000 instructions for it
+# before it fails, including legalizer artifacts like
+# %foo:_(s65600) = G_MERGE_VALUES. Since it is not really feasible to have that
+# many CHECK-lines, only the presence of the error is checked.
+
+---
+name: test_implicit_def_s1025
+body: |
+ bb.0:
+ ; CHECK: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(s65600) = G_MERGE_VALUES
+ %0:_(s1025) = G_IMPLICIT_DEF
+ %1:_(s32) = G_TRUNC %0
+ $vgpr0 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
index 0ff5bbcb5a6d..061fbd72e618 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir
@@ -174,29 +174,35 @@ body: |
$vgpr0 = COPY %1
...
----
-name: test_implicit_def_s1025
-body: |
- bb.0:
-
- ; CHECK-LABEL: name: test_implicit_def_s1025
- ; CHECK: [[DEF:%[0-9]+]]:_(s1025) = G_IMPLICIT_DEF
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1025)
- ; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
- %0:_(s1025) = G_IMPLICIT_DEF
- %1:_(s32) = G_TRUNC %0
- $vgpr0 = COPY %1
-...
-
---
name: test_implicit_def_s1056
body: |
bb.0:
; CHECK-LABEL: name: test_implicit_def_s1056
- ; CHECK: [[DEF:%[0-9]+]]:_(s1056) = G_IMPLICIT_DEF
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1056)
- ; CHECK: $vgpr0 = COPY [[TRUNC]](s32)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024)
+ ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+ ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32)
+ ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[UV5]](s32)
+ ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV6]](s32), [[UV7]](s32)
+ ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV9]](s32)
+ ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV10]](s32), [[UV11]](s32)
+ ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV13]](s32)
+ ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV14]](s32), [[UV15]](s32)
+ ; CHECK: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV16]](s32), [[UV17]](s32)
+ ; CHECK: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV18]](s32), [[UV19]](s32)
+ ; CHECK: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV20]](s32), [[UV21]](s32)
+ ; CHECK: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV22]](s32), [[UV23]](s32)
+ ; CHECK: [[MV12:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV24]](s32), [[UV25]](s32)
+ ; CHECK: [[MV13:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV26]](s32), [[UV27]](s32)
+ ; CHECK: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV28]](s32), [[UV29]](s32)
+ ; CHECK: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV30]](s32), [[UV31]](s32)
+ ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK: [[MV16:%[0-9]+]]:_(s2112) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s1056) = G_TRUNC [[MV16]](s2112)
+ ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[TRUNC]](s1056)
+ ; CHECK: $vgpr0 = COPY [[TRUNC1]](s32)
%0:_(s1056) = G_IMPLICIT_DEF
%1:_(s32) = G_TRUNC %0
$vgpr0 = COPY %1
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index ffdd108cd853..4c0834e8bfaf 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -2748,4 +2748,60 @@ TEST_F(AArch64GISelMITest, CreateLibcall) {
EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
}
+// Test narrowing of G_IMPLICIT_DEF
+TEST_F(AArch64GISelMITest, NarrowImplicitDef) {
+ setUp();
+ if (!TM)
+ return;
+
+ DefineLegalizerInfo(A, {});
+
+ // Make sure that G_IMPLICIT_DEF can be narrowed if the original size is not a
+ // multiple of narrow size
+ LLT S32{LLT::scalar(32)};
+ LLT S48{LLT::scalar(48)};
+ LLT S64{LLT::scalar(64)};
+ LLT V2S64{{LLT::vector(2, 64)}};
+
+ auto Implicit1 = B.buildUndef(S64);
+ auto Implicit2 = B.buildUndef(S64);
+ auto Implicit3 = B.buildUndef(V2S64);
+ auto Implicit4 = B.buildUndef(V2S64);
+
+ AInfo Info(MF->getSubtarget());
+ DummyGISelObserver Observer;
+ LegalizerHelper Helper(*MF, Info, Observer, B);
+
+ // Perform Legalization
+ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+ Helper.narrowScalar(*Implicit1, 0, S48));
+ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+ Helper.narrowScalar(*Implicit2, 0, S32));
+ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+ Helper.narrowScalar(*Implicit3, 0, S48));
+ EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+ Helper.narrowScalar(*Implicit4, 0, S32));
+
+ const auto *CheckStr = R"(
+ CHECK: [[DEF:%[0-9]+]]:_(s48) = G_IMPLICIT_DEF
+ CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[DEF]]
+
+ CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]]:_(s32), [[DEF1]]
+
+ CHECK: [[DEF:%[0-9]+]]:_(<2 x s48>) = G_IMPLICIT_DEF
+ CHECK: [[ANYEXT:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[DEF]]
+
+ CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ CHECK: [[DEF3:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ CHECK: [[BV:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]]:_(s32), [[DEF1]]:_(s32), [[DEF2]]:_(s32), [[DEF3]]:_(s32)
+ )";
+
+ // Check
+ EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
+}
+
} // namespace
More information about the llvm-commits
mailing list