[llvm] [GlobalIsel][AArch64] more legal icmps (PR #78239)

Thorsten Schütt via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 16 11:28:06 PST 2024


https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/78239

>From e51001fca546ff3b5ddb40d643319a69d88a746f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Tue, 16 Jan 2024 09:52:37 +0100
Subject: [PATCH 1/5] [GlobalIsel][AArch64] more legal icmps

In https://github.com/llvm/llvm-project/pull/78181 the godbolt (https://llvm.godbolt.org/z/vMsnxMf1v) crashed with GlobalIsel.

LLVM ERROR: unable to legalize instruction: %90:_(<3 x s32>) = G_ICMP intpred(uge), %15:_(<3 x s32>), %0:_ (in function: vec3_i32)
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  8 +++
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |  7 ++-
 .../AArch64/GlobalISel/legalize-cmp.mir       | 54 ++++++++++++++++---
 3 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 91d2497fdb7e20..22df0fd34c057b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5314,6 +5314,14 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
     Observer.changedInstr(MI);
     return Legalized;
   }
+  case TargetOpcode::G_ICMP: {
+    Observer.changingInstr(MI);
+    moreElementsVectorSrc(MI, MoreTy, 2);
+    moreElementsVectorSrc(MI, MoreTy, 3);
+    moreElementsVectorDst(MI, MoreTy, 0);
+    Observer.changedInstr(MI);
+    return Legalized;
+  }
   default:
     return UnableToLegalize;
   }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e94f9d0c68ffe7..7357d666f1f80b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -524,7 +524,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .minScalarOrEltIf(
           [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
           s64)
-      .clampNumElements(0, v2s32, v4s32);
+      .moreElementsToNextPow2(0)
+      .clampMaxNumElements(0, s64, 2)
+      .clampMaxNumElements(0, s32, 4)
+      .clampMaxNumElements(0, s16, 8)
+      .clampMaxNumElements(0, s8, 16);
 
   getActionDefinitionsBuilder(G_FCMP)
       // If we don't have full FP16 support, then scalarize the elements of
@@ -863,6 +867,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
           },
           0, s8)
       .minScalarOrElt(0, s8) // Worst case, we need at least s8.
+      .moreElementsToNextPow2(1)
       .clampMaxNumElements(1, s64, 2)
       .clampMaxNumElements(1, s32, 4)
       .clampMaxNumElements(1, s16, 8)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
index e9b3aa0a3a8fd8..b590f8bfd6c111 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
@@ -56,7 +56,9 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK: bb.2:
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %0:_(s128) = G_IMPLICIT_DEF
@@ -93,7 +95,9 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK: bb.2:
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s128) = G_IMPLICIT_DEF
@@ -132,7 +136,9 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK: bb.2:
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s88) = G_IMPLICIT_DEF
@@ -171,7 +177,9 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK: bb.2:
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s88) = G_IMPLICIT_DEF
@@ -210,7 +218,9 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK: bb.2:
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s96) = G_IMPLICIT_DEF
@@ -272,7 +282,9 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK: bb.2:
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s318) = G_IMPLICIT_DEF
@@ -318,7 +330,9 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK: bb.2:
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s158) = G_IMPLICIT_DEF
@@ -330,3 +344,29 @@ body:             |
     successors:
   bb.3:
     RET_ReallyLR
+...
+---
+name:            test_3xs32_eq_pr_78181
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+    ; CHECK-LABEL: name: test_3xs32_eq_pr_78181
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %const:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ICMP]](<4 x s32>), [[C]](s64)
+    ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32)
+    ; CHECK-NEXT: RET_ReallyLR
+    %const:_(s32) = G_IMPLICIT_DEF
+    %rhs:_(<3 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32)
+    %lhs:_(<3 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32)
+    %cmp:_(<3 x s32>) = G_ICMP intpred(eq), %lhs(<3 x s32>), %rhs
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s32>), %1(s32)
+    $w0 = COPY %2(s32)
+    RET_ReallyLR

>From 18524de3d3eeb86b010e93322244efb80fa6763c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Tue, 16 Jan 2024 15:58:29 +0100
Subject: [PATCH 2/5] improve icmp legalization

fix tests
---
 .../AArch64/GISel/AArch64LegalizerInfo.cpp    |   8 +-
 .../AArch64/GlobalISel/legalize-cmp.mir       | 207 ++++++++++++++++--
 .../GlobalISel/legalize-shuffle-vector.mir    |  20 +-
 llvm/test/CodeGen/AArch64/arm64-vabs.ll       | 111 ++++++----
 llvm/test/CodeGen/AArch64/icmp.ll             | 156 ++++++++-----
 5 files changed, 370 insertions(+), 132 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 7357d666f1f80b..2b3078e5d17675 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -525,10 +525,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
           [=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
           s64)
       .moreElementsToNextPow2(0)
-      .clampMaxNumElements(0, s64, 2)
-      .clampMaxNumElements(0, s32, 4)
-      .clampMaxNumElements(0, s16, 8)
-      .clampMaxNumElements(0, s8, 16);
+      .clampNumElements(0, v8s8, v16s8)
+      .clampNumElements(0, v4s16, v8s16)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampNumElements(0, v2s64, v2s64);
 
   getActionDefinitionsBuilder(G_FCMP)
       // If we don't have full FP16 support, then scalarize the elements of
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
index b590f8bfd6c111..542cf018a6c003 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir
@@ -56,9 +56,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
+  ; CHECK: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %0:_(s128) = G_IMPLICIT_DEF
@@ -95,9 +93,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
+  ; CHECK: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s128) = G_IMPLICIT_DEF
@@ -136,9 +132,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
+  ; CHECK: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s88) = G_IMPLICIT_DEF
@@ -177,9 +171,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
+  ; CHECK: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s88) = G_IMPLICIT_DEF
@@ -218,9 +210,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
+  ; CHECK: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s96) = G_IMPLICIT_DEF
@@ -282,9 +272,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
+  ; CHECK: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s318) = G_IMPLICIT_DEF
@@ -330,9 +318,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT: bb.2:
+  ; CHECK: bb.2:
   ; CHECK-NEXT:   RET_ReallyLR
   bb.1:
     %lhs:_(s158) = G_IMPLICIT_DEF
@@ -370,3 +356,182 @@ body:             |
     %2:_(s32) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s32>), %1(s32)
     $w0 = COPY %2(s32)
     RET_ReallyLR
+...
+---
+name:            test_3xs16_eq_pr_78181
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+    ; CHECK-LABEL: name: test_3xs16_eq_pr_78181
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %const:_(s16) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<4 x s16>), [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ICMP]](<4 x s16>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s16)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s16)
+    ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s16)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK-NEXT: %zext:_(s32) = G_AND [[EVEC]], [[C1]]
+    ; CHECK-NEXT: $w0 = COPY %zext(s32)
+    ; CHECK-NEXT: RET_ReallyLR
+    %const:_(s16) = G_IMPLICIT_DEF
+    %rhs:_(<3 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16)
+    %lhs:_(<3 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16)
+    %cmp:_(<3 x s16>) = G_ICMP intpred(eq), %lhs(<3 x s16>), %rhs
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s16) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s16>), %1(s32)
+    %zext:_(s32) = G_ZEXT %2(s16)
+    $w0 = COPY %zext(s32)
+    RET_ReallyLR
+...
+---
+name:            test_3xs8_eq_pr_78181
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+    ; CHECK-LABEL: name: test_3xs8_eq_pr_78181
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %const:_(s8) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s8>), [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s8>), [[UV1:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[ICMP]](<8 x s8>)
+    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV]](<4 x s8>)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
+    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
+    ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[DEF]](s32)
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<4 x s32>), [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+    ; CHECK-NEXT: %zext:_(s32) = G_AND [[EVEC]], [[C1]]
+    ; CHECK-NEXT: $w0 = COPY %zext(s32)
+    ; CHECK-NEXT: RET_ReallyLR
+    %const:_(s8) = G_IMPLICIT_DEF
+    %rhs:_(<3 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8)
+    %lhs:_(<3 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8)
+    %cmp:_(<3 x s8>) = G_ICMP intpred(eq), %lhs(<3 x s8>), %rhs
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s8) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s8>), %1(s32)
+    %zext:_(s32) = G_ZEXT %2(s8)
+    $w0 = COPY %zext(s32)
+    RET_ReallyLR
+...
+---
+name:            test_3xs64_eq_clamp
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+    ; CHECK-LABEL: name: test_3xs64_eq_clamp
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %const:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %const(s64), %const(s64)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %const(s64), %const(s64)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s64>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<2 x s64>), [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[ICMP]](<2 x s64>), [[C]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[EVEC]](s64)
+    ; CHECK-NEXT: RET_ReallyLR
+    %const:_(s64) = G_IMPLICIT_DEF
+    %rhs:_(<3 x s64>) = G_BUILD_VECTOR %const(s64), %const(s64), %const(s64)
+    %lhs:_(<3 x s64>) = G_BUILD_VECTOR %const(s64), %const(s64), %const(s64)
+    %cmp:_(<3 x s64>) = G_ICMP intpred(eq), %lhs(<3 x s64>), %rhs
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s64) = G_EXTRACT_VECTOR_ELT %cmp(<3 x s64>), %1(s32)
+    $x0 = COPY %2(s64)
+    RET_ReallyLR
+...
+---
+name:            test_5xs32_eq_clamp
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+    ; CHECK-LABEL: name: test_5xs32_eq_clamp
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %const:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ICMP]](<4 x s32>), [[C]](s64)
+    ; CHECK-NEXT: $w0 = COPY [[EVEC]](s32)
+    ; CHECK-NEXT: RET_ReallyLR
+    %const:_(s32) = G_IMPLICIT_DEF
+    %rhs:_(<5 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32), %const(s32)
+    %lhs:_(<5 x s32>) = G_BUILD_VECTOR %const(s32), %const(s32), %const(s32), %const(s32), %const(s32)
+    %cmp:_(<5 x s32>) = G_ICMP intpred(eq), %lhs(<5 x s32>), %rhs
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s32) = G_EXTRACT_VECTOR_ELT %cmp(<5 x s32>), %1(s32)
+    $w0 = COPY %2(s32)
+    RET_ReallyLR
+...
+---
+name:            test_7xs16_eq_clamp
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+    ; CHECK-LABEL: name: test_7xs16_eq_clamp
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %const:_(s16) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s16>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s16) = G_EXTRACT_VECTOR_ELT [[ICMP]](<8 x s16>), [[C]](s64)
+    ; CHECK-NEXT: %zext:_(s32) = G_ZEXT [[EVEC]](s16)
+    ; CHECK-NEXT: $w0 = COPY %zext(s32)
+    ; CHECK-NEXT: RET_ReallyLR
+    %const:_(s16) = G_IMPLICIT_DEF
+    %rhs:_(<7 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16)
+    %lhs:_(<7 x s16>) = G_BUILD_VECTOR %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16), %const(s16)
+    %cmp:_(<7 x s16>) = G_ICMP intpred(eq), %lhs(<7 x s16>), %rhs
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s16) = G_EXTRACT_VECTOR_ELT %cmp(<7 x s16>), %1(s32)
+    %zext:_(s32) = G_ZEXT %2(s16)
+    $w0 = COPY %zext(s32)
+    RET_ReallyLR
+...
+---
+name:            test_9xs8_eq_clamp
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0
+    ; CHECK-LABEL: name: test_9xs8_eq_clamp
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %const:_(s8) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8)
+    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8)
+    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<16 x s8>), [[BUILD_VECTOR1]]
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[ICMP]](<16 x s8>), [[C]](s64)
+    ; CHECK-NEXT: %zext:_(s32) = G_ZEXT [[EVEC]](s8)
+    ; CHECK-NEXT: $w0 = COPY %zext(s32)
+    ; CHECK-NEXT: RET_ReallyLR
+    %const:_(s8) = G_IMPLICIT_DEF
+    %rhs:_(<9 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8)
+    %lhs:_(<9 x s8>) = G_BUILD_VECTOR %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8), %const(s8)
+    %cmp:_(<9 x s8>) = G_ICMP intpred(eq), %lhs(<9 x s8>), %rhs
+    %1:_(s32) = G_CONSTANT i32 1
+    %2:_(s8) = G_EXTRACT_VECTOR_ELT %cmp(<9 x s8>), %1(s32)
+    %zext:_(s32) = G_ZEXT %2(s8)
+    $w0 = COPY %zext(s32)
+    RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
index 07946388590e29..2cfee7bcc462a2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir
@@ -316,26 +316,14 @@ body:             |
     ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[DEF]](s32)
     ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[DEF]](s32)
     ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<4 x s32>), [[BUILD_VECTOR1]], shufflemask(0, 1, 5, 6)
-    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
-    ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
-    ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
-    ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
-    ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
-    ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s32>), [[UV11:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
-    ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s32>), [[UV13:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
-    ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[SHUF]](<4 x s32>)
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-    ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV]](<2 x s32>), [[UV3]](<2 x s32>)
-    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<4 x s32>), [[C]](s64)
+    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C]](s64)
     ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV4]](<2 x s32>), [[UV7]](<2 x s32>)
-    ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS1]](<4 x s32>), [[C1]](s64)
+    ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C1]](s64)
     ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV8]](<2 x s32>), [[UV11]](<2 x s32>)
-    ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS2]](<4 x s32>), [[C2]](s64)
+    ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C2]](s64)
     ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV12]](<2 x s32>), [[UV15]](<2 x s32>)
-    ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS3]](<4 x s32>), [[C3]](s64)
+    ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[SHUF]](<4 x s32>), [[C3]](s64)
     ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32)
     ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR2]](<4 x s32>)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 7c71449a316338..e452d488885a58 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -2,21 +2,6 @@
 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck -check-prefixes=CHECK,CHECK-SD %s
 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
-; CHECK-GI:       warning: Instruction selection used fallback path for uabd16b_rdx
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uabd4s_rdx
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sabd4s_rdx
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for abs_8b
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for abs_16b
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for abs_4h
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for abs_8h
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for abs_2s
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for abs_4s
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for abs_1d
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for abs_1d_honestly
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fabds
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for fabdd
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for uabd_i64
-
 define <8 x i16> @sabdl8h(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: sabdl8h:
 ; CHECK:       // %bb.0:
@@ -244,14 +229,32 @@ declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
 declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
 
 define i16 @uabd16b_rdx(ptr %a, ptr %b) {
-; CHECK-LABEL: uabd16b_rdx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    ldr q1, [x1]
-; CHECK-NEXT:    uabd.16b v0, v0, v1
-; CHECK-NEXT:    uaddlv.16b h0, v0
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: uabd16b_rdx:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ldr q0, [x0]
+; CHECK-SD-NEXT:    ldr q1, [x1]
+; CHECK-SD-NEXT:    uabd.16b v0, v0, v1
+; CHECK-SD-NEXT:    uaddlv.16b h0, v0
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: uabd16b_rdx:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ldr q1, [x0]
+; CHECK-GI-NEXT:    ldr q2, [x1]
+; CHECK-GI-NEXT:    movi.2d v0, #0000000000000000
+; CHECK-GI-NEXT:    usubl.8h v3, v1, v2
+; CHECK-GI-NEXT:    usubl2.8h v1, v1, v2
+; CHECK-GI-NEXT:    neg.8h v2, v3
+; CHECK-GI-NEXT:    neg.8h v4, v1
+; CHECK-GI-NEXT:    cmgt.8h v5, v0, v3
+; CHECK-GI-NEXT:    cmgt.8h v0, v0, v1
+; CHECK-GI-NEXT:    bif.16b v2, v3, v5
+; CHECK-GI-NEXT:    bsl.16b v0, v4, v1
+; CHECK-GI-NEXT:    add.8h v0, v2, v0
+; CHECK-GI-NEXT:    addv.8h h0, v0
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
   %aload = load <16 x i8>, ptr %a, align 1
   %bload = load <16 x i8>, ptr %b, align 1
   %aext = zext <16 x i8> %aload to <16 x i16>
@@ -468,14 +471,32 @@ declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
 declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
 
 define i64 @uabd4s_rdx(ptr %a, ptr %b, i32 %h) {
-; CHECK-LABEL: uabd4s_rdx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    ldr q1, [x1]
-; CHECK-NEXT:    uabd.4s v0, v0, v1
-; CHECK-NEXT:    uaddlv.4s d0, v0
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: uabd4s_rdx:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    ldr q0, [x0]
+; CHECK-SD-NEXT:    ldr q1, [x1]
+; CHECK-SD-NEXT:    uabd.4s v0, v0, v1
+; CHECK-SD-NEXT:    uaddlv.4s d0, v0
+; CHECK-SD-NEXT:    fmov x0, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: uabd4s_rdx:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ldr q1, [x0]
+; CHECK-GI-NEXT:    ldr q2, [x1]
+; CHECK-GI-NEXT:    movi.2d v0, #0000000000000000
+; CHECK-GI-NEXT:    usubl.2d v3, v1, v2
+; CHECK-GI-NEXT:    usubl2.2d v1, v1, v2
+; CHECK-GI-NEXT:    neg.2d v2, v3
+; CHECK-GI-NEXT:    neg.2d v4, v1
+; CHECK-GI-NEXT:    cmgt.2d v5, v0, v3
+; CHECK-GI-NEXT:    cmgt.2d v0, v0, v1
+; CHECK-GI-NEXT:    bif.16b v2, v3, v5
+; CHECK-GI-NEXT:    bsl.16b v0, v4, v1
+; CHECK-GI-NEXT:    add.2d v0, v2, v0
+; CHECK-GI-NEXT:    addp.2d d0, v0
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    ret
   %aload = load <4 x i32>, ptr %a, align 1
   %bload = load <4 x i32>, ptr %b, align 1
   %aext = zext <4 x i32> %aload to <4 x i64>
@@ -489,12 +510,28 @@ define i64 @uabd4s_rdx(ptr %a, ptr %b, i32 %h) {
 }
 
 define i64 @sabd4s_rdx(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: sabd4s_rdx:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sabd.4s v0, v0, v1
-; CHECK-NEXT:    uaddlv.4s d0, v0
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: sabd4s_rdx:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    sabd.4s v0, v0, v1
+; CHECK-SD-NEXT:    uaddlv.4s d0, v0
+; CHECK-SD-NEXT:    fmov x0, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sabd4s_rdx:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ssubl.2d v3, v0, v1
+; CHECK-GI-NEXT:    ssubl2.2d v0, v0, v1
+; CHECK-GI-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-GI-NEXT:    neg.2d v1, v3
+; CHECK-GI-NEXT:    neg.2d v4, v0
+; CHECK-GI-NEXT:    cmgt.2d v5, v2, v3
+; CHECK-GI-NEXT:    cmgt.2d v2, v2, v0
+; CHECK-GI-NEXT:    bif.16b v1, v3, v5
+; CHECK-GI-NEXT:    bit.16b v0, v4, v2
+; CHECK-GI-NEXT:    add.2d v0, v1, v0
+; CHECK-GI-NEXT:    addp.2d d0, v0
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    ret
   %aext = sext <4 x i32> %a to <4 x i64>
   %bext = sext <4 x i32> %b to <4 x i64>
   %abdiff = sub nsw <4 x i64> %aext, %bext
diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index d2b44bb5e3f9f1..d0a7f67a723523 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -2,12 +2,6 @@
 ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
-; CHECK-GI:       warning: Instruction selection used fallback path for v3i64_i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v4i64_i64
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v3i32_i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v16i16_i16
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v32i8_i8
-
 define i64 @i64_i64(i64 %a, i64 %b, i64 %d, i64 %e) {
 ; CHECK-LABEL: i64_i64:
 ; CHECK:       // %bb.0: // %entry
@@ -71,33 +65,63 @@ entry:
 }
 
 define <3 x i64> @v3i64_i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %d, <3 x i64> %e) {
-; CHECK-LABEL: v3i64_i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $d4 killed $d4 def $q4
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT:    // kill: def $d6 killed $d6 def $q6
-; CHECK-NEXT:    // kill: def $d7 killed $d7 def $q7
-; CHECK-NEXT:    // kill: def $d5 killed $d5 def $q5
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT:    ldr d16, [sp, #24]
-; CHECK-NEXT:    ldr d17, [sp]
-; CHECK-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-NEXT:    mov v6.d[1], v7.d[0]
-; CHECK-NEXT:    ldp d1, d4, [sp, #8]
-; CHECK-NEXT:    mov v1.d[1], v4.d[0]
-; CHECK-NEXT:    cmgt v0.2d, v3.2d, v0.2d
-; CHECK-NEXT:    bsl v0.16b, v6.16b, v1.16b
-; CHECK-NEXT:    cmgt v1.2d, v5.2d, v2.2d
-; CHECK-NEXT:    mov v2.16b, v1.16b
-; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT:    bsl v2.16b, v17.16b, v16.16b
-; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: v3i64_i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    ldr d16, [sp, #24]
+; CHECK-SD-NEXT:    ldr d17, [sp]
+; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-SD-NEXT:    ldp d1, d4, [sp, #8]
+; CHECK-SD-NEXT:    mov v1.d[1], v4.d[0]
+; CHECK-SD-NEXT:    cmgt v0.2d, v3.2d, v0.2d
+; CHECK-SD-NEXT:    bsl v0.16b, v6.16b, v1.16b
+; CHECK-SD-NEXT:    cmgt v1.2d, v5.2d, v2.2d
+; CHECK-SD-NEXT:    mov v2.16b, v1.16b
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    bsl v2.16b, v17.16b, v16.16b
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: v3i64_i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-GI-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-GI-NEXT:    ldr x8, [sp]
+; CHECK-GI-NEXT:    ldr x10, [sp, #24]
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    cmgt v2.2d, v5.2d, v2.2d
+; CHECK-GI-NEXT:    ldp d1, d4, [sp, #8]
+; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-GI-NEXT:    fmov x9, d2
+; CHECK-GI-NEXT:    mov v1.d[1], v4.d[0]
+; CHECK-GI-NEXT:    cmgt v0.2d, v3.2d, v0.2d
+; CHECK-GI-NEXT:    sbfx x9, x9, #0, #1
+; CHECK-GI-NEXT:    bsl v0.16b, v6.16b, v1.16b
+; CHECK-GI-NEXT:    and x8, x8, x9
+; CHECK-GI-NEXT:    bic x9, x10, x9
+; CHECK-GI-NEXT:    orr x8, x8, x9
+; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    ret
 entry:
   %c = icmp slt <3 x i64> %a, %b
   %s = select <3 x i1> %c, <3 x i64> %d, <3 x i64> %e
@@ -105,13 +129,21 @@ entry:
 }
 
 define <4 x i64> @v4i64_i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %d, <4 x i64> %e) {
-; CHECK-LABEL: v4i64_i64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmgt v1.2d, v3.2d, v1.2d
-; CHECK-NEXT:    cmgt v0.2d, v2.2d, v0.2d
-; CHECK-NEXT:    bsl v1.16b, v5.16b, v7.16b
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v6.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: v4i64_i64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    cmgt v1.2d, v3.2d, v1.2d
+; CHECK-SD-NEXT:    cmgt v0.2d, v2.2d, v0.2d
+; CHECK-SD-NEXT:    bsl v1.16b, v5.16b, v7.16b
+; CHECK-SD-NEXT:    bsl v0.16b, v4.16b, v6.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: v4i64_i64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    cmgt v0.2d, v2.2d, v0.2d
+; CHECK-GI-NEXT:    cmgt v1.2d, v3.2d, v1.2d
+; CHECK-GI-NEXT:    bsl v0.16b, v4.16b, v6.16b
+; CHECK-GI-NEXT:    bsl v1.16b, v5.16b, v7.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %c = icmp slt <4 x i64> %a, %b
   %s = select <4 x i1> %c, <4 x i64> %d, <4 x i64> %e
@@ -201,13 +233,21 @@ entry:
 }
 
 define <16 x i16> @v16i16_i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %d, <16 x i16> %e) {
-; CHECK-LABEL: v16i16_i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmgt v1.8h, v3.8h, v1.8h
-; CHECK-NEXT:    cmgt v0.8h, v2.8h, v0.8h
-; CHECK-NEXT:    bsl v1.16b, v5.16b, v7.16b
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v6.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: v16i16_i16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    cmgt v1.8h, v3.8h, v1.8h
+; CHECK-SD-NEXT:    cmgt v0.8h, v2.8h, v0.8h
+; CHECK-SD-NEXT:    bsl v1.16b, v5.16b, v7.16b
+; CHECK-SD-NEXT:    bsl v0.16b, v4.16b, v6.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: v16i16_i16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    cmgt v0.8h, v2.8h, v0.8h
+; CHECK-GI-NEXT:    cmgt v1.8h, v3.8h, v1.8h
+; CHECK-GI-NEXT:    bsl v0.16b, v4.16b, v6.16b
+; CHECK-GI-NEXT:    bsl v1.16b, v5.16b, v7.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %c = icmp slt <16 x i16> %a, %b
   %s = select <16 x i1> %c, <16 x i16> %d, <16 x i16> %e
@@ -239,13 +279,21 @@ entry:
 }
 
 define <32 x i8> @v32i8_i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %d, <32 x i8> %e) {
-; CHECK-LABEL: v32i8_i8:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cmgt v1.16b, v3.16b, v1.16b
-; CHECK-NEXT:    cmgt v0.16b, v2.16b, v0.16b
-; CHECK-NEXT:    bsl v1.16b, v5.16b, v7.16b
-; CHECK-NEXT:    bsl v0.16b, v4.16b, v6.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: v32i8_i8:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    cmgt v1.16b, v3.16b, v1.16b
+; CHECK-SD-NEXT:    cmgt v0.16b, v2.16b, v0.16b
+; CHECK-SD-NEXT:    bsl v1.16b, v5.16b, v7.16b
+; CHECK-SD-NEXT:    bsl v0.16b, v4.16b, v6.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: v32i8_i8:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    cmgt v0.16b, v2.16b, v0.16b
+; CHECK-GI-NEXT:    cmgt v1.16b, v3.16b, v1.16b
+; CHECK-GI-NEXT:    bsl v0.16b, v4.16b, v6.16b
+; CHECK-GI-NEXT:    bsl v1.16b, v5.16b, v7.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %c = icmp slt <32 x i8> %a, %b
   %s = select <32 x i1> %c, <32 x i8> %d, <32 x i8> %e

>From e3021e279ffd401dc1f692ebbc17084483ba6ffc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Tue, 16 Jan 2024 17:41:38 +0100
Subject: [PATCH 3/5] add todo to moreElements

---
 llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 22df0fd34c057b..560f8d2562c040 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5315,6 +5315,9 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
     return Legalized;
   }
   case TargetOpcode::G_ICMP: {
+    // TODO: the symmetric MoreTy works for targets like, e.g. NEON.
+    // For targets, like e.g. MVE, the result is a predicated vector (i1).
+    // This will some refactoring.
     Observer.changingInstr(MI);
     moreElementsVectorSrc(MI, MoreTy, 2);
     moreElementsVectorSrc(MI, MoreTy, 3);

>From da1d8e196b3accd752870f93e1a2633508a7debd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Tue, 16 Jan 2024 18:33:54 +0100
Subject: [PATCH 4/5] fix typo

---
 llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 560f8d2562c040..a868860f343ba7 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5317,7 +5317,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_ICMP: {
     // TODO: the symmetric MoreTy works for targets like, e.g. NEON.
     // For targets, like e.g. MVE, the result is a predicated vector (i1).
-    // This will some refactoring.
+    // This will need some refactoring.
     Observer.changingInstr(MI);
     moreElementsVectorSrc(MI, MoreTy, 2);
     moreElementsVectorSrc(MI, MoreTy, 3);

>From 4d6c6a20a975d1d830e66c743806c83a56d71d80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Tue, 16 Jan 2024 20:27:17 +0100
Subject: [PATCH 5/5] document fallback in icmp.ll

---
 llvm/test/CodeGen/AArch64/icmp.ll | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index d0a7f67a723523..26711ea584c977 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc -mtriple=aarch64-none-eabi -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
+; CHECK-GI:  warning: Instruction selection used fallback path for v3i32_i32
+
 define i64 @i64_i64(i64 %a, i64 %b, i64 %d, i64 %e) {
 ; CHECK-LABEL: i64_i64:
 ; CHECK:       // %bb.0: // %entry



More information about the llvm-commits mailing list