[llvm] r351693 - AMDGPU/GlobalISel: Fix some crashs in g_unmerge_values/g_merge_values
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 20 10:40:36 PST 2019
Author: arsenm
Date: Sun Jan 20 10:40:36 2019
New Revision: 351693
URL: http://llvm.org/viewvc/llvm-project?rev=351693&view=rev
Log:
AMDGPU/GlobalISel: Fix some crashs in g_unmerge_values/g_merge_values
This was crashing in the predicate function assuming the value
is a vector.
Copy more of what AArch64 uses. This probably needs more refinement
later, but I don't exactly understand what it means in some cases,
particularly since any legalization for these seems to be missing.
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=351693&r1=351692&r2=351693&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Sun Jan 20 10:40:36 2019
@@ -35,6 +35,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
+ const LLT S256 = LLT::scalar(256);
const LLT S512 = LLT::scalar(512);
const LLT V2S16 = LLT::vector(2, 16);
@@ -298,25 +299,85 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
+ auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
+ const LLT &Ty = Query.Types[TypeIdx];
+ if (Ty.isVector()) {
+ const LLT &EltTy = Ty.getElementType();
+ if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
+ return true;
+ if (!isPowerOf2_32(EltTy.getSizeInBits()))
+ return true;
+ }
+ return false;
+ };
+
+ auto scalarize =
+ [=](const LegalityQuery &Query, unsigned TypeIdx) {
+ const LLT &Ty = Query.Types[TypeIdx];
+ return std::make_pair(TypeIdx, Ty.getElementType());
+ };
+
getActionDefinitionsBuilder(Op)
+ // Break up vectors with weird elements into scalars
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
+ [=](const LegalityQuery &Query) { return scalarize(Query, 0); })
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
+ [=](const LegalityQuery &Query) { return scalarize(Query, 1); })
+ .clampScalar(BigTyIdx, S32, S512)
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ const LLT &Ty = Query.Types[BigTyIdx];
+ return !isPowerOf2_32(Ty.getSizeInBits()) &&
+ Ty.getSizeInBits() % 16 != 0;
+ },
+ [=](const LegalityQuery &Query) {
+ // Pick the next power of 2, or a multiple of 64 over 128.
+ // Whichever is smaller.
+ const LLT &Ty = Query.Types[BigTyIdx];
+ unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
+ if (NewSizeInBits >= 256) {
+ unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
+ if (RoundedTo < NewSizeInBits)
+ NewSizeInBits = RoundedTo;
+ }
+ return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
+ })
+ .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
+ // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
+ // worth considering the multiples of 64 since 2*192 and 2*384 are not
+ // valid.
+ .clampScalar(LitTyIdx, S16, S256)
+ .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
.legalIf([=](const LegalityQuery &Query) {
const LLT &BigTy = Query.Types[BigTyIdx];
const LLT &LitTy = Query.Types[LitTyIdx];
- return BigTy.getSizeInBits() % 32 == 0 &&
- LitTy.getSizeInBits() % 32 == 0 &&
+
+ if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
+ return false;
+ if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
+ return false;
+
+ return BigTy.getSizeInBits() % 16 == 0 &&
+ LitTy.getSizeInBits() % 16 == 0 &&
BigTy.getSizeInBits() <= 512;
})
// Any vectors left are the wrong size. Scalarize them.
- .fewerElementsIf([](const LegalityQuery &Query) { return true; },
- [](const LegalityQuery &Query) {
- return std::make_pair(
- 0, Query.Types[0].getElementType());
- })
- .fewerElementsIf([](const LegalityQuery &Query) { return true; },
- [](const LegalityQuery &Query) {
- return std::make_pair(
- 1, Query.Types[1].getElementType());
- });
+ .fewerElementsIf([](const LegalityQuery &Query) {
+ return Query.Types[0].isVector();
+ },
+ [](const LegalityQuery &Query) {
+ return std::make_pair(
+ 0, Query.Types[0].getElementType());
+ })
+ .fewerElementsIf([](const LegalityQuery &Query) {
+ return Query.Types[1].isVector();
+ },
+ [](const LegalityQuery &Query) {
+ return std::make_pair(
+ 1, Query.Types[1].getElementType());
+ });
}
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir?rev=351693&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values-xfail.mir Sun Jan 20 10:40:36 2019
@@ -0,0 +1,14 @@
+# RUN: not llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -o - %s 2>&1 | FileCheck %s
+
+# CHECK: LLVM ERROR: unable to legalize instruction: %1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0:_(<2 x s1>) (in function: test_unmerge_v2s1)
+
+---
+name: test_unmerge_v2s1
+body: |
+ bb.0:
+ %0:_(<2 x s1>) = G_IMPLICIT_DEF
+ %1:_(s1), %2:_(s1) = G_UNMERGE_VALUES %0
+ S_NOP 0, implicit %1
+ S_NOP 0, implicit %2
+...
+
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir?rev=351693&r1=351692&r2=351693&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir Sun Jan 20 10:40:36 2019
@@ -2,17 +2,76 @@
# RUN: llc -mtriple=amdgcn-- -O0 -run-pass=legalizer -o - %s | FileCheck %s
---
-name: test_unmerge_s64_s32
+name: test_unmerge_s32_s64
body: |
bb.0:
- ; CHECK-LABEL: name: test_unmerge_s64_s32
+ ; CHECK-LABEL: name: test_unmerge_s32_s64
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; CHECK: $vgpr0 = COPY [[UV]](s32)
- ; CHECK: $vgpr2 = COPY [[UV1]](s32)
+ ; CHECK: $vgpr1 = COPY [[UV1]](s32)
%0:_(s64) = G_CONSTANT i64 0
%1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0:_(s64)
$vgpr0 = COPY %1(s32)
- $vgpr2 = COPY %2(s32)
+ $vgpr1 = COPY %2(s32)
+...
+
+---
+name: test_unmerge_s32_v2s32
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: test_unmerge_s32_v2s32
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+ ; CHECK: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: $vgpr21 = COPY [[UV1]](s32)
+ %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+ %1:_(s32), %2:_(s32) = G_UNMERGE_VALUES %0
+ $vgpr0 = COPY %1
+ $vgpr21= COPY %2
+...
+
+---
+name: test_unmerge_s16_v2s16
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: test_unmerge_s16_v2s16
+ ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+ ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+ ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(s16), %2:_(s16) = G_UNMERGE_VALUES %0
+ %3:_(s32) = G_ANYEXT %1
+ %4:_(s32) = G_ANYEXT %2
+ $vgpr0 = COPY %3
+ $vgpr1 = COPY %4
+...
+
+---
+name: test_unmerge_s16_v3s16
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: test_unmerge_s16_v3s16
+ ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+ ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[DEF]](<3 x s16>)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+ ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+ ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+ ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32)
+ ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32)
+ %0:_(<3 x s16>) = G_IMPLICIT_DEF
+ %1:_(s16), %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
+ %4:_(s32) = G_ANYEXT %1
+ %5:_(s32) = G_ANYEXT %2
+ %6:_(s32) = G_ANYEXT %3
+ $vgpr0 = COPY %4
+ $vgpr1 = COPY %5
+ $vgpr2 = COPY %6
...
More information about the llvm-commits
mailing list