[llvm] [GlobalIsel][AArch64] more legal icmps (PR #78239)
Thorsten Schütt via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 16 00:55:07 PST 2024
https://github.com/tschuett created https://github.com/llvm/llvm-project/pull/78239
In https://github.com/llvm/llvm-project/pull/78181 the godbolt (https://llvm.godbolt.org/z/vMsnxMf1v) crashed with GlobalIsel.
LLVM ERROR: unable to legalize instruction: %90:_(<3 x s32>) = G_ICMP intpred(uge), %15:_(<3 x s32>), %0:_ (in function: vec3_i32)
>From e51001fca546ff3b5ddb40d643319a69d88a746f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Tue, 16 Jan 2024 09:52:37 +0100
Subject: [PATCH] [GlobalIsel][AArch64] more legal icmps
In https://github.com/llvm/llvm-project/pull/78181 the godbolt (https://llvm.godbolt.org/z/vMsnxMf1v) crashed with GlobalIsel.
LLVM ERROR: unable to legalize instruction: %90:_(<3 x s32>) = G_ICMP intpred(uge), %15:_(<3 x s32>), %0:_ (in function: vec3_i32)
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 8 +++
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 7 ++-
.../AArch64/GlobalISel/legalize-cmp.mir | 54 ++++++++++++++++---
3 files changed, 61 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 91d2497fdb7e208..22df0fd34c057bd 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5314,6 +5314,14 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_ICMP: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorSrc(MI, MoreTy, 3);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
default:
return UnableToLegalize;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e94f9d0c68ffe78..7357d666f1f80b8 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -524,7 +524,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
s64)
- .clampNumElements(0, v2s32, v4s32);
+ .moreElementsToNextPow2(0)
+ .clampMaxNumElements(0, s64, 2)
+ .clampMaxNumElements(0, s32, 4)
+ .clampMaxNumElements(0, s16, 8)
+ .clampMaxNumElements(0, s8, 16);
getActionDefinitionsBuilder(G_FCMP)
// If we don't have full FP16 support, then scalarize the elements of
@@ -863,6 +867,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
},
0, s8)
.minScalarOrElt(0, s8) // Worst case, we need at least s8.
+ .moreElementsToNextPow2(1)
.clampMaxNumElements(1, s64, 2)
.clampMaxNumElements(1, s32, 4)
.clampMaxNumElements(1, s16, 8)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
index e9b3aa0a3a8fd83..b590f8bfd6c1114 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
@@ -56,7 +56,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors:
- ; CHECK: bb.2:
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
%0:_(s128) = G_IMPLICIT_DEF
@@ -93,7 +95,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors:
- ; CHECK: bb.2:
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
%lhs:_(s128) = G_IMPLICIT_DEF
@@ -132,7 +136,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors:
- ; CHECK: bb.2:
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
%lhs:_(s88) = G_IMPLICIT_DEF
@@ -171,7 +177,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors:
- ; CHECK: bb.2:
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
%lhs:_(s88) = G_IMPLICIT_DEF
@@ -210,7 +218,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors:
- ; CHECK: bb.2:
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
%lhs:_(s96) = G_IMPLICIT_DEF
@@ -272,7 +282,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors:
- ; CHECK: bb.2:
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
%lhs:_(s318) = G_IMPLICIT_DEF
@@ -318,7 +330,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors:
- ; CHECK: bb.2:
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
; CHECK-NEXT: RET_ReallyLR
bb.1:
%lhs:_(s158) = G_IMPLICIT_DEF
@@ -330,3 +344,29 @@ body: |
successors:
bb.3:
RET_ReallyLR
+...
+---
+name: test_3xs32_eq_pr_78181
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0
+ ; CHECK-LABEL: name: test_3xs32_eq_pr_78181
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %const:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ICMP]](<4 x s32>), [[C]](s64)
+ ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32)
+ ; CHECK-NEXT: RET_ReallyLR
+ %const:_(s32) = G_IMPLICIT_DEF
+ %rhs:_(<3 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32)
+ %lhs:_(<3 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32)
+ %cmp:_(<3 x s32>) = G_ICMP intpred(eq), %lhs(<3 x s32>), %rhs
+ %1:_(s32) = G_CONSTANT i32 1
+ %2:_(s32) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s32>), %1(s32)
+ $w0 = COPY %2(s32)
+ RET_ReallyLR
More information about the llvm-commits
mailing list