[llvm] [GlobalIsel] Combine freeze (PR #93239)

Thorsten Schütt via llvm-commits llvm-commits at lists.llvm.org
Sat May 25 08:41:17 PDT 2024


https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/93239

>From 95486e7c020c3d368c9a58f6b98fe29f7c449171 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 23 May 2024 18:40:55 +0200
Subject: [PATCH 1/6] [GlobalIsel] Combine freeze

---
 .../include/llvm/Target/GlobalISel/Combine.td |  16 ++-
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         | 102 ++++++++++++++++--
 2 files changed, 105 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 8012f91922777..40a6d69f7e372 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1713,6 +1713,17 @@ def integer_reassoc_combines: GICombineGroup<[
   APlusBMinusCPlusA
 ]>;
 
+def freeze_of_not_undef_and_poison : GICombineRule<
+   (defs root:$root),
+   (match (G_FREEZE $root, $src),
+          [{ return isGuaranteedNotToBeUndefOrPoison(${src}.getReg(), MRI); }]),
+   (apply (GIReplaceReg $root, $src))>;
+
+def freeze_combines: GICombineGroup<[
+  freeze_of_not_undef_and_poison,
+  push_freeze_to_prevent_poison_from_propagating
+]>;
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -1771,7 +1782,7 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
                                            constant_fold_fp_binop]>;
 
 def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
-    vector_ops_combines,
+    vector_ops_combines, freeze_combines,
     insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload,
     combine_extracted_vector_load,
     undef_combines, identity_combines, phi_combines,
@@ -1793,8 +1804,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
     combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
-    sext_trunc, zext_trunc, combine_shuffle_concat,
-    push_freeze_to_prevent_poison_from_propagating]>;
+    sext_trunc, zext_trunc, combine_shuffle_concat]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index f455482e02943..93a76fde6ab27 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1724,6 +1724,39 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
   }
 }
 
+/// Shifts return poison if shiftwidth is larger than the bitwidth.
+static bool shiftAmountKnownInRange(Register ShiftAmount,
+                                    const MachineRegisterInfo &MRI) {
+  LLT Ty = MRI.getType(ShiftAmount);
+
+  if (Ty.isScalableVector())
+    return false; // Can't tell, just return false to be safe
+
+  if (Ty.isScalar()) {
+    std::optional<ValueAndVReg> Val =
+        getIConstantVRegValWithLookThrough(ShiftAmount, MRI);
+    if (!Val)
+      return false;
+    return Val->Value.ult(Ty.getScalarSizeInBits());
+  }
+
+  GBuildVector *BV = getOpcodeDef<GBuildVector>(ShiftAmount, MRI);
+  if (!BV)
+    return false;
+
+  unsigned Sources = BV->getNumSources();
+  for (unsigned I = 0; I < Sources; ++I) {
+    std::optional<ValueAndVReg> Val =
+        getIConstantVRegValWithLookThrough(BV->getSourceReg(I), MRI);
+    if (!Val)
+      return false;
+    if (!Val->Value.ult(Ty.getScalarSizeInBits()))
+      return false;
+  }
+
+  return true;
+}
+
 namespace {
 enum class UndefPoisonKind {
   PoisonOnly = (1 << 0),
@@ -1732,11 +1765,11 @@ enum class UndefPoisonKind {
 };
 }
 
-[[maybe_unused]] static bool includesPoison(UndefPoisonKind Kind) {
+static bool includesPoison(UndefPoisonKind Kind) {
   return (unsigned(Kind) & unsigned(UndefPoisonKind::PoisonOnly)) != 0;
 }
 
-[[maybe_unused]] static bool includesUndef(UndefPoisonKind Kind) {
+static bool includesUndef(UndefPoisonKind Kind) {
   return (unsigned(Kind) & unsigned(UndefPoisonKind::UndefOnly)) != 0;
 }
 
@@ -1745,18 +1778,55 @@ static bool canCreateUndefOrPoison(Register Reg, const MachineRegisterInfo &MRI,
                                    UndefPoisonKind Kind) {
   MachineInstr *RegDef = MRI.getVRegDef(Reg);
 
-  if (auto *GMI = dyn_cast<GenericMachineInstr>(RegDef)) {
-    if (ConsiderFlagsAndMetadata && includesPoison(Kind) &&
-        GMI->hasPoisonGeneratingFlags())
-      return true;
-  } else {
-    // Conservatively return true.
-    return true;
-  }
+  if (ConsiderFlagsAndMetadata && includesPoison(Kind))
+    if (auto *GMI = dyn_cast<GenericMachineInstr>(RegDef))
+      if (GMI->hasPoisonGeneratingFlags())
+        return true;
 
+  // Check whether opcode is a poison/undef-generating operation.
   switch (RegDef->getOpcode()) {
   case TargetOpcode::G_FREEZE:
+  case TargetOpcode::G_BUILD_VECTOR:
+  case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
     return false;
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR:
+    return includesPoison(Kind) &&
+           !shiftAmountKnownInRange(RegDef->getOperand(2).getReg(), MRI);
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI:
+    // fptosi/ui yields poison if the resulting value does not fit in the
+    // destination type.
+    return true;
+  case TargetOpcode::G_CTLZ:
+  case TargetOpcode::G_CTTZ:
+  case TargetOpcode::G_ABS:
+  case TargetOpcode::G_CTPOP:
+  case TargetOpcode::G_BSWAP:
+  case TargetOpcode::G_BITREVERSE:
+  case TargetOpcode::G_FSHL:
+  case TargetOpcode::G_FSHR:
+  case TargetOpcode::G_SMAX:
+  case TargetOpcode::G_SMIN:
+  case TargetOpcode::G_UMAX:
+  case TargetOpcode::G_UMIN:
+  case TargetOpcode::G_PTRMASK:
+  case TargetOpcode::G_SADDO:
+  case TargetOpcode::G_SSUBO:
+  case TargetOpcode::G_UADDO:
+  case TargetOpcode::G_USUBO:
+  case TargetOpcode::G_SMULO:
+  case TargetOpcode::G_UMULO:
+  case TargetOpcode::G_SADDSAT:
+  case TargetOpcode::G_UADDSAT:
+  case TargetOpcode::G_SSUBSAT:
+  case TargetOpcode::G_USUBSAT:
+    return false;
+  case TargetOpcode::G_SSHLSAT:
+  case TargetOpcode::G_USHLSAT:
+    return includesPoison(Kind) &&
+           !shiftAmountKnownInRange(RegDef->getOperand(2).getReg(), MRI);
   default:
     return !isa<GCastOp>(RegDef) && !isa<GBinOp>(RegDef);
   }
@@ -1776,6 +1846,18 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
     return true;
   case TargetOpcode::G_IMPLICIT_DEF:
     return !includesUndef(Kind);
+  case TargetOpcode::G_CONSTANT:
+  case TargetOpcode::G_FCONSTANT:
+    return false;
+  case TargetOpcode::G_BUILD_VECTOR: {
+    GBuildVector *BV = cast<GBuildVector>(RegDef);
+    unsigned NumSources = BV->getNumSources();
+    for (unsigned I = 0; I < NumSources; ++I)
+      if (!::isGuaranteedNotToBeUndefOrPoison(BV->getSourceReg(I), MRI,
+                                              Depth + 1, Kind))
+        return false;
+    return true;
+  }
   default: {
     auto MOCheck = [&](const MachineOperand &MO) {
       if (!MO.isReg())

>From 14c2b5d692746347cf7a100d007d7aa331f04770 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 23 May 2024 18:48:46 +0200
Subject: [PATCH 2/6] mend

---
 .../AArch64/GlobalISel/combine-freeze.mir     | 656 ++++++++++++++++++
 1 file changed, 656 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
new file mode 100644
index 0000000000000..10df96bc73ed7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -0,0 +1,656 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
+
+...
+---
+name:            freeze_register
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_register
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %2:_(s64) = G_FREEZE %0
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            freeze_constant
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_constant
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = G_CONSTANT i64 9
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            freeze_fconstant
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_fconstant
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 9.000000e+00
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = G_FCONSTANT double 9.0
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_undef
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_undef
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[DEF]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = G_IMPLICIT_DEF
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_freeze
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_freeze
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = G_FREEZE %0
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_buildvector
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_buildvector
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+    ; CHECK-NEXT: $q0 = COPY [[FREEZE]](<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(s32) = COPY $w0
+    %1:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %0(s32), %0(s32), %0(s32)
+    %2:_(<4 x s32>) = G_FREEZE %1
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+...
+---
+name:            freeze_buildvector_const
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_buildvector_const
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %c:_(s32) = G_CONSTANT i32 6
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %c(s32), %c(s32), %c(s32), %c(s32)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+    ; CHECK-NEXT: $q0 = COPY [[FREEZE]](<4 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(s32) = COPY $w0
+    %c:_(s32) = G_CONSTANT i32 6
+    %1:_(<4 x s32>) = G_BUILD_VECTOR %c(s32), %c(s32), %c(s32), %c(s32)
+    %2:_(<4 x s32>) = G_FREEZE %1
+    $q0 = COPY %2(<4 x s32>)
+    RET_ReallyLR implicit $q0
+...
+---
+name:            freeze_disjoint_or_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_disjoint_or_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: $x0 = COPY %c(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = disjoint G_OR %c, %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_or_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_or_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: $x0 = COPY %c(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_OR %c, %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_nneg_zext_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_nneg_zext_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 9
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %cst
+    ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %c(s32)
+    ; CHECK-NEXT: $x0 = COPY [[ZEXT]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s32) = G_CONSTANT i32 9
+    %c:_(s32) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = nneg G_ZEXT %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_zext_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_zext_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 9
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %cst
+    ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %c(s32)
+    ; CHECK-NEXT: $x0 = COPY [[ZEXT]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s32) = G_CONSTANT i32 9
+    %c:_(s32) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_ZEXT %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_udiv_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_udiv_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV %c, %c
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[UDIV]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_UDIV %c, %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_exact_udiv_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_exact_udiv_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = exact G_UDIV %c, %c
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[UDIV]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = exact G_UDIV %c, %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_mul_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_mul_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL %c, %c
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[MUL]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_MUL %c, %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_nsw_mul_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_nsw_mul_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = nsw G_MUL %c, %c
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[MUL]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = nsw G_MUL %c, %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_trunc_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_trunc_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %c(s64)
+    ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s32) = G_TRUNC %c
+    %2:_(s32) = G_FREEZE %1
+    $w0 = COPY %2(s32)
+    RET_ReallyLR implicit $q0
+...
+---
+name:            freeze_nuw_trunc_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_nuw_trunc_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %c(s64)
+    ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $q0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s32) = nuw G_TRUNC %c
+    %2:_(s32) = G_FREEZE %1
+    $w0 = COPY %2(s32)
+    RET_ReallyLR implicit $q0
+...
+---
+name:            freeze_add_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_add_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %c, %c
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[ADD]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_ADD %c, %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_nuw_add_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_nuw_add_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD %c, %c
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[ADD]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = nuw G_ADD %c, %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_xor_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_xor_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR %c, %c
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[XOR]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_XOR %c, %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_fptosi_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_fptosi_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:_(s64) = G_FPTOSI %c(s64)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[FPTOSI]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_FPTOSI %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_fptoui_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_fptoui_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s64) = G_FPTOUI %c(s64)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[FPTOUI]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_FPTOUI %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_shl_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_shl_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL %c, %c(s64)
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[SHL]]
+    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_SHL %c, %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_ctlz_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_ctlz_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ %c(s64)
+    ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_CTLZ %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_cttz_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_cttz_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ %c(s64)
+    ; CHECK-NEXT: $x0 = COPY [[CTTZ]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_CTTZ %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_bswap_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_bswap_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(s64) = G_BSWAP %c
+    ; CHECK-NEXT: $x0 = COPY [[BSWAP]](s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %1:_(s64) = G_BSWAP %c
+    %2:_(s64) = G_FREEZE %1
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_icmp_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_icmp_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %c(s64), %d
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %cmp
+    ; CHECK-NEXT: %ext:_(s64) = G_ZEXT [[FREEZE]](s1)
+    ; CHECK-NEXT: $x0 = COPY %ext(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %cmp:_(s1) = G_ICMP intpred(eq), %c(s64), %d
+    %2:_(s1) = G_FREEZE %cmp
+    %ext:_(s64) = G_ZEXT %2(s1)
+    $x0 = COPY %ext(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_fcmp_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_fcmp_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %cmp
+    ; CHECK-NEXT: %ext:_(s64) = G_ZEXT [[FREEZE]](s1)
+    ; CHECK-NEXT: $x0 = COPY %ext(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
+    %2:_(s1) = G_FREEZE %cmp
+    %ext:_(s64) = G_ZEXT %2(s1)
+    $x0 = COPY %ext(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            freeze_zext_fcmp_fold_barrier
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: freeze_zext_fcmp_fold_barrier
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
+    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %cmp
+    ; CHECK-NEXT: %ext:_(s64) = G_ZEXT [[FREEZE]](s1)
+    ; CHECK-NEXT: $x0 = COPY %ext(s64)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 9
+    %c:_(s64) = G_CONSTANT_FOLD_BARRIER  %cst
+    %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
+    %ext:_(s64) = G_ZEXT %cmp(s1)
+    %2:_(s64) = G_FREEZE %ext
+    $x0 = COPY %2(s64)
+    RET_ReallyLR implicit $x0

>From 77e8d53a45b1fdefa0c3f4d12d47c68bb4bb9856 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Thu, 23 May 2024 23:10:53 +0200
Subject: [PATCH 3/6] more

---
 .../include/llvm/Target/GlobalISel/Combine.td |   2 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |   5 +-
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         |   2 +-
 .../GlobalISel/combine-extract-vec-elt.mir    |   4 +-
 .../AArch64/GlobalISel/combine-freeze.mir     |  90 +---
 .../GlobalISel/combine-insert-vec-elt.mir     |   6 +-
 ...galizer-combiner-divrem-insertpt-crash.mir |   3 +-
 llvm/test/CodeGen/AArch64/fast-isel-select.ll | 454 ++++++++++++----
 llvm/test/CodeGen/AMDGPU/div_i128.ll          | 490 ++++++++----------
 9 files changed, 592 insertions(+), 464 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 40a6d69f7e372..47f73daf20891 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1636,7 +1636,7 @@ extract_vector_element_build_vector_trunc5,
 extract_vector_element_build_vector_trunc6,
 extract_vector_element_build_vector_trunc7,
 extract_vector_element_build_vector_trunc8,
-extract_vector_element_freeze,
+//extract_vector_element_freeze,
 extract_vector_element_shuffle_vector,
 insert_vector_element_extract_vector_element
 ]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 4cc602b5c8709..abecee4259030 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -265,11 +265,12 @@ bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand(
     }
   }
 
-  cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
+  // FIXME: observer must be aware of dropping
+  //  cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
 
   // Eliminate freeze if all operands are guaranteed non-poison.
   if (!MaybePoisonOperand) {
-    MatchInfo = [=](MachineIRBuilder &B) { MRI.replaceRegWith(DstOp, OrigOp); };
+    MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(DstOp, OrigOp); };
     return true;
   }
 
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 93a76fde6ab27..e8438be94b3cd 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1848,7 +1848,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
     return !includesUndef(Kind);
   case TargetOpcode::G_CONSTANT:
   case TargetOpcode::G_FCONSTANT:
-    return false;
+    return true;
   case TargetOpcode::G_BUILD_VECTOR: {
     GBuildVector *BV = cast<GBuildVector>(RegDef);
     unsigned NumSources = BV->getNumSources();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
index d5d33742148ad..70241e71aa593 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir
@@ -361,8 +361,8 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
     ; CHECK-NEXT: %idx:_(s64) = COPY $x1
-    ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s64)
-    ; CHECK-NEXT: %extract:_(s64) = G_FREEZE [[EVEC]]
+    ; CHECK-NEXT: %fvec:_(<2 x s64>) = G_FREEZE %vec
+    ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %fvec(<2 x s64>), %idx(s64)
     ; CHECK-NEXT: $x0 = COPY %extract(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %vec:_(<2 x s64>) = COPY $q0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index 10df96bc73ed7..dfa45d96fe94c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -31,8 +31,7 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 9
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C]]
-    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[C]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %1:_(s64) = G_CONSTANT i64 9
@@ -51,8 +50,7 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 9.000000e+00
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C]]
-    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[C]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %1:_(s64) = G_FCONSTANT double 9.0
@@ -129,8 +127,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %c:_(s32) = G_CONSTANT i32 6
     ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR %c(s32), %c(s32), %c(s32), %c(s32)
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]]
-    ; CHECK-NEXT: $q0 = COPY [[FREEZE]](<4 x s32>)
+    ; CHECK-NEXT: $q0 = COPY [[BUILD_VECTOR]](<4 x s32>)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(s32) = COPY $w0
     %c:_(s32) = G_CONSTANT i32 6
@@ -149,8 +146,7 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
-    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: $x0 = COPY %c(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
@@ -171,8 +167,7 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
-    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: $x0 = COPY %c(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
@@ -193,10 +188,9 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 9
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %cst
-    ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
-    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %c(s32)
-    ; CHECK-NEXT: $x0 = COPY [[ZEXT]](s64)
+    ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: %3:_(s64) = nneg G_ZEXT %c(s32)
+    ; CHECK-NEXT: $x0 = COPY %3(s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %cst:_(s32) = G_CONSTANT i32 9
@@ -216,8 +210,7 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 9
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE %cst
-    ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %c(s32)
     ; CHECK-NEXT: $x0 = COPY [[ZEXT]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
@@ -241,8 +234,7 @@ body:             |
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV %c, %c
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[UDIV]]
-    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[UDIV]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %cst:_(s64) = G_CONSTANT i64 9
@@ -264,8 +256,7 @@ body:             |
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = exact G_UDIV %c, %c
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[UDIV]]
-    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[UDIV]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %cst:_(s64) = G_CONSTANT i64 9
@@ -287,8 +278,7 @@ body:             |
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL %c, %c
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[MUL]]
-    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[MUL]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %cst:_(s64) = G_CONSTANT i64 9
@@ -310,8 +300,7 @@ body:             |
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = nsw G_MUL %c, %c
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[MUL]]
-    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[MUL]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %cst:_(s64) = G_CONSTANT i64 9
@@ -331,8 +320,7 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
-    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %c(s64)
     ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
@@ -354,9 +342,8 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
-    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %c(s64)
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = nuw G_TRUNC %c(s64)
     ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(s64) = COPY $x0
@@ -379,8 +366,7 @@ body:             |
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %c, %c
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[ADD]]
-    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[ADD]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %cst:_(s64) = G_CONSTANT i64 9
@@ -402,8 +388,7 @@ body:             |
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD %c, %c
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[ADD]]
-    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[ADD]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %cst:_(s64) = G_CONSTANT i64 9
@@ -425,8 +410,7 @@ body:             |
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR %c, %c
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[XOR]]
-    ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
+    ; CHECK-NEXT: $x0 = COPY [[XOR]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %cst:_(s64) = G_CONSTANT i64 9
@@ -515,8 +499,7 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
-    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ %c(s64)
     ; CHECK-NEXT: $x0 = COPY [[CTLZ]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
@@ -538,8 +521,7 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
-    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[CTTZ:%[0-9]+]]:_(s64) = G_CTTZ %c(s64)
     ; CHECK-NEXT: $x0 = COPY [[CTTZ]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
@@ -561,8 +543,7 @@ body:             |
     ; CHECK: liveins: $w0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE %cst
-    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER [[FREEZE]]
+    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
     ; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(s64) = G_BSWAP %c
     ; CHECK-NEXT: $x0 = COPY [[BSWAP]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
@@ -627,30 +608,3 @@ body:             |
     %ext:_(s64) = G_ZEXT %2(s1)
     $x0 = COPY %ext(s64)
     RET_ReallyLR implicit $x0
-...
----
-name:            freeze_zext_fcmp_fold_barrier
-body:             |
-  bb.1:
-    liveins: $w0
-
-    ; CHECK-LABEL: name: freeze_zext_fcmp_fold_barrier
-    ; CHECK: liveins: $w0
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
-    ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
-    ; CHECK-NEXT: %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
-    ; CHECK-NEXT: %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE %cmp
-    ; CHECK-NEXT: %ext:_(s64) = G_ZEXT [[FREEZE]](s1)
-    ; CHECK-NEXT: $x0 = COPY %ext(s64)
-    ; CHECK-NEXT: RET_ReallyLR implicit $x0
-    %0:_(s64) = COPY $x0
-    %cst:_(s64) = G_CONSTANT i64 9
-    %c:_(s64) = G_CONSTANT_FOLD_BARRIER  %cst
-    %d:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
-    %cmp:_(s1) = G_FCMP floatpred(oeq), %c(s64), %d
-    %ext:_(s64) = G_ZEXT %cmp(s1)
-    %2:_(s64) = G_FREEZE %ext
-    $x0 = COPY %2(s64)
-    RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
index 0c67a867580cc..c000a8e635bc6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-insert-vec-elt.mir
@@ -253,10 +253,10 @@ body:             |
     ; CHECK: liveins: $x0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 127
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
-    ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<32 x s8>) = G_FREEZE [[BUILD_VECTOR]]
-    ; CHECK-NEXT: G_STORE [[FREEZE]](<32 x s8>), [[COPY]](p0) :: (store (<32 x s8>))
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s8>) = G_BUILD_VECTOR [[C]](s8), [[C]](s8), [[C]](s8), [[DEF]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8), [[C]](s8)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s8>), [[COPY]](p0) :: (store (<32 x s8>))
     ; CHECK-NEXT: RET_ReallyLR
     %3:_(s8) = G_CONSTANT i8 127
     %2:_(<32 x s8>) = G_BUILD_VECTOR %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8), %3(s8)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
index ca403f8515611..767ece62b8731 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir
@@ -24,8 +24,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1
-  ; CHECK-NEXT:   [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C1]]
-  ; CHECK-NEXT:   [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[FREEZE]], [[C]]
+  ; CHECK-NEXT:   [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[C1]], [[C]]
   ; CHECK-NEXT:   G_STORE [[UDIV]](s64), [[COPY]](p0) :: (store (s64))
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-select.ll b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
index 6ad4a5ae572e0..9184066fc8107 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-select.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
@@ -1,175 +1,288 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
 
 ; First test the different supported value types for select.
 define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
-; CHECK-LABEL: select_i1
-; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+; GISEL-LABEL: select_i1:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    tst w0, #0x1
+; GISEL-NEXT:    csel w0, w1, w2, ne
+; GISEL-NEXT:    ret
   %1 = select i1 %c, i1 %a, i1 %b
   ret i1 %1
 }
 
 define zeroext i8 @select_i8(i1 zeroext %c, i8 zeroext %a, i8 zeroext %b) {
-; CHECK-LABEL: select_i8
-; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+; GISEL-LABEL: select_i8:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    tst w0, #0x1
+; GISEL-NEXT:    csel w0, w1, w2, ne
+; GISEL-NEXT:    ret
   %1 = select i1 %c, i8 %a, i8 %b
   ret i8 %1
 }
 
 define zeroext i16 @select_i16(i1 zeroext %c, i16 zeroext %a, i16 zeroext %b) {
-; CHECK-LABEL: select_i16
-; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+; GISEL-LABEL: select_i16:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    tst w0, #0x1
+; GISEL-NEXT:    csel w0, w1, w2, ne
+; GISEL-NEXT:    ret
   %1 = select i1 %c, i16 %a, i16 %b
   ret i16 %1
 }
 
 define i32 @select_i32(i1 zeroext %c, i32 %a, i32 %b) {
-; CHECK-LABEL: select_i32
-; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+; GISEL-LABEL: select_i32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    tst w0, #0x1
+; GISEL-NEXT:    csel w0, w1, w2, ne
+; GISEL-NEXT:    ret
   %1 = select i1 %c, i32 %a, i32 %b
   ret i32 %1
 }
 
 define i64 @select_i64(i1 zeroext %c, i64 %a, i64 %b) {
-; CHECK-LABEL: select_i64
-; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT:  csel {{x[0-9]+}}, x1, x2, ne
+; GISEL-LABEL: select_i64:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    tst w0, #0x1
+; GISEL-NEXT:    csel x0, x1, x2, ne
+; GISEL-NEXT:    ret
   %1 = select i1 %c, i64 %a, i64 %b
   ret i64 %1
 }
 
 define float @select_f32(i1 zeroext %c, float %a, float %b) {
-; CHECK-LABEL: select_f32
-; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ne
-; GISEL-LABEL: select_f32
-; GISEL:       {{cmp w0, #0|tst w0, #0x1}}
-; GISEL-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ne
+; GISEL-LABEL: select_f32:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    tst w0, #0x1
+; GISEL-NEXT:    fcsel s0, s0, s1, ne
+; GISEL-NEXT:    ret
   %1 = select i1 %c, float %a, float %b
   ret float %1
 }
 
 define double @select_f64(i1 zeroext %c, double %a, double %b) {
-; CHECK-LABEL: select_f64
-; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
-; CHECK-NEXT:  fcsel {{d[0-9]+}}, d0, d1, ne
-; GISEL-LABEL: select_f64
-; GISEL:       {{cmp w0, #0|tst w0, #0x1}}
-; GISEL-NEXT:  fcsel {{d[0-9]+}}, d0, d1, ne
+; GISEL-LABEL: select_f64:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    tst w0, #0x1
+; GISEL-NEXT:    fcsel d0, d0, d1, ne
+; GISEL-NEXT:    ret
   %1 = select i1 %c, double %a, double %b
   ret double %1
 }
 
 ; Now test the folding of all compares.
 define float @select_fcmp_false(float %x, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_false
-; CHECK:       fmov {{s[0-9]+}}, s2
+; CHECK-LABEL: select_fcmp_false:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fmov s0, s2
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_false:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s0
+; GISEL-NEXT:    fcsel s0, s1, s2, gt
+; GISEL-NEXT:    ret
   %1 = fcmp ogt float %x, %x
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_ogt(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ogt
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, gt
+; CHECK-LABEL: select_fcmp_ogt:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, gt
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_ogt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, gt
+; GISEL-NEXT:    ret
   %1 = fcmp ogt float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_oge(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_oge
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, ge
+; CHECK-LABEL: select_fcmp_oge:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, ge
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_oge:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, ge
+; GISEL-NEXT:    ret
   %1 = fcmp oge float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_olt(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_olt
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, mi
+; CHECK-LABEL: select_fcmp_olt:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, mi
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_olt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, mi
+; GISEL-NEXT:    ret
   %1 = fcmp olt float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_ole(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ole
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, ls
+; CHECK-LABEL: select_fcmp_ole:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, ls
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_ole:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, ls
+; GISEL-NEXT:    ret
   %1 = fcmp ole float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_one(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_one
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel [[REG:s[0-9]+]], s2, s3, mi
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, [[REG]], gt
+; CHECK-LABEL: select_fcmp_one:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, mi
+; CHECK-NEXT:    fcsel s0, s2, s0, gt
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_one:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    cset w8, mi
+; GISEL-NEXT:    cset w9, gt
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    fcsel s0, s2, s3, ne
+; GISEL-NEXT:    ret
   %1 = fcmp one float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_ord(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ord
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, vc
+; CHECK-LABEL: select_fcmp_ord:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, vc
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_ord:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, vc
+; GISEL-NEXT:    ret
   %1 = fcmp ord float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_uno(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_uno
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, vs
+; CHECK-LABEL: select_fcmp_uno:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, vs
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_uno:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, vs
+; GISEL-NEXT:    ret
   %1 = fcmp uno float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_ueq(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ueq
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel [[REG:s[0-9]+]], s2, s3, eq
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, [[REG]], vs
+; CHECK-LABEL: select_fcmp_ueq:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, eq
+; CHECK-NEXT:    fcsel s0, s2, s0, vs
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_ueq:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    cset w8, eq
+; GISEL-NEXT:    cset w9, vs
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    fcsel s0, s2, s3, ne
+; GISEL-NEXT:    ret
   %1 = fcmp ueq float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_ugt(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ugt
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, hi
+; CHECK-LABEL: select_fcmp_ugt:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, hi
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_ugt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, hi
+; GISEL-NEXT:    ret
   %1 = fcmp ugt float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_uge(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_uge
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, pl
+; CHECK-LABEL: select_fcmp_uge:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, pl
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_uge:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, pl
+; GISEL-NEXT:    ret
   %1 = fcmp uge float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_ult(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ult
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, lt
+; CHECK-LABEL: select_fcmp_ult:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, lt
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_ult:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, lt
+; GISEL-NEXT:    ret
   %1 = fcmp ult float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
@@ -177,116 +290,224 @@ define float @select_fcmp_ult(float %x, float %y, float %a, float %b) {
 
 
 define float @select_fcmp_ule(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ule
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, le
+; CHECK-LABEL: select_fcmp_ule:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, le
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_ule:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, le
+; GISEL-NEXT:    ret
   %1 = fcmp ule float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_une(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_une
-; CHECK:       fcmp s0, s1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, ne
+; CHECK-LABEL: select_fcmp_une:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fcmp s0, s1
+; CHECK-NEXT:    fcsel s0, s2, s3, ne
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_une:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s1
+; GISEL-NEXT:    fcsel s0, s2, s3, ne
+; GISEL-NEXT:    ret
   %1 = fcmp une float %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_fcmp_true(float %x, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_true
-; CHECK:       fmov {{s[0-9]+}}, s1
+; CHECK-LABEL: select_fcmp_true:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    fmov s0, s1
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_fcmp_true:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp s0, s0
+; GISEL-NEXT:    cset w8, eq
+; GISEL-NEXT:    cset w9, vs
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    fcsel s0, s1, s2, ne
+; GISEL-NEXT:    ret
   %1 = fcmp ueq float %x, %x
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_icmp_eq(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_eq
-; CHECK:       cmp w0, w1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, eq
+; CHECK-LABEL: select_icmp_eq:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    fcsel s0, s0, s1, eq
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_icmp_eq:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    fcsel s0, s0, s1, eq
+; GISEL-NEXT:    ret
   %1 = icmp eq i32 %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_icmp_ne(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_ne
-; CHECK:       cmp w0, w1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ne
+; CHECK-LABEL: select_icmp_ne:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_icmp_ne:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    fcsel s0, s0, s1, ne
+; GISEL-NEXT:    ret
   %1 = icmp ne i32 %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_icmp_ugt(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_ugt
-; CHECK:       cmp w0, w1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, hi
+; CHECK-LABEL: select_icmp_ugt:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    fcsel s0, s0, s1, hi
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_icmp_ugt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    fcsel s0, s0, s1, hi
+; GISEL-NEXT:    ret
   %1 = icmp ugt i32 %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_icmp_uge(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_uge
-; CHECK:       cmp w0, w1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, hs
+; CHECK-LABEL: select_icmp_uge:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    fcsel s0, s0, s1, hs
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_icmp_uge:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    fcsel s0, s0, s1, hs
+; GISEL-NEXT:    ret
   %1 = icmp uge i32 %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_icmp_ult(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_ult
-; CHECK:       cmp w0, w1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, lo
+; CHECK-LABEL: select_icmp_ult:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    fcsel s0, s0, s1, lo
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_icmp_ult:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    fcsel s0, s0, s1, lo
+; GISEL-NEXT:    ret
   %1 = icmp ult i32 %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_icmp_ule(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_ule
-; CHECK:       cmp w0, w1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ls
+; CHECK-LABEL: select_icmp_ule:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    fcsel s0, s0, s1, ls
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_icmp_ule:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    fcsel s0, s0, s1, ls
+; GISEL-NEXT:    ret
   %1 = icmp ule i32 %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_icmp_sgt(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_sgt
-; CHECK:       cmp w0, w1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, gt
+; CHECK-LABEL: select_icmp_sgt:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    fcsel s0, s0, s1, gt
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_icmp_sgt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    fcsel s0, s0, s1, gt
+; GISEL-NEXT:    ret
   %1 = icmp sgt i32 %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_icmp_sge(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_sge
-; CHECK:       cmp w0, w1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ge
+; CHECK-LABEL: select_icmp_sge:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    fcsel s0, s0, s1, ge
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_icmp_sge:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    fcsel s0, s0, s1, ge
+; GISEL-NEXT:    ret
   %1 = icmp sge i32 %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_icmp_slt(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_slt
-; CHECK:       cmp w0, w1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, lt
+; CHECK-LABEL: select_icmp_slt:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    fcsel s0, s0, s1, lt
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_icmp_slt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    fcsel s0, s0, s1, lt
+; GISEL-NEXT:    ret
   %1 = icmp slt i32 %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
 }
 
 define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
-; CHECK-LABEL: select_icmp_sle
-; CHECK:       cmp w0, w1
-; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, le
+; CHECK-LABEL: select_icmp_sle:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    fcsel s0, s0, s1, le
+; CHECK-NEXT:    ret
+;
+; GISEL-LABEL: select_icmp_sle:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    fcsel s0, s0, s1, le
+; GISEL-NEXT:    ret
   %1 = icmp sle i32 %x, %y
   %2 = select i1 %1, float %a, float %b
   ret float %2
@@ -294,30 +515,41 @@ define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
 
 ; Test peephole optimizations for select.
 define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
-; CHECK-LABEL: select_opt1
-; CHECK:       orr {{w[0-9]+}}, w0, w1
+; GISEL-LABEL: select_opt1:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    orr w8, w0, w1
+; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    ret
   %1 = select i1 %c, i1 true, i1 %a
   ret i1 %1
 }
 
 define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
-; CHECK-LABEL: select_opt2
-; CHECK:       eor [[REG:w[0-9]+]], w0, #0x1
-; CHECK:       orr {{w[0-9]+}}, [[REG]], w1
+; GISEL-LABEL: select_opt2:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    eor w8, w0, #0x1
+; GISEL-NEXT:    orr w8, w8, w1
+; GISEL-NEXT:    and w0, w8, #0x1
+; GISEL-NEXT:    ret
   %1 = select i1 %c, i1 %a, i1 true
   ret i1 %1
 }
 
 define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
-; CHECK-LABEL: select_opt3
-; CHECK:       bic {{w[0-9]+}}, w1, w0
+; GISEL-LABEL: select_opt3:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    eor w8, w0, #0x1
+; GISEL-NEXT:    and w0, w8, w1
+; GISEL-NEXT:    ret
   %1 = select i1 %c, i1 false, i1 %a
   ret i1 %1
 }
 
 define zeroext i1 @select_opt4(i1 zeroext %c, i1 zeroext %a) {
-; CHECK-LABEL: select_opt4
-; CHECK:       and {{w[0-9]+}}, w0, w1
+; GISEL-LABEL: select_opt4:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    and w0, w0, w1
+; GISEL-NEXT:    ret
   %1 = select i1 %c, i1 %a, i1 false
   ret i1 %1
 }
diff --git a/llvm/test/CodeGen/AMDGPU/div_i128.ll b/llvm/test/CodeGen/AMDGPU/div_i128.ll
index b2f9bf89d9ec6..7d8eba1e87080 100644
--- a/llvm/test/CodeGen/AMDGPU/div_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/div_i128.ll
@@ -1509,49 +1509,39 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v3
 ; GFX9-G-O0-NEXT:    v_xor_b32_e64 v1, v12, v1
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v4, v12, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v2, v12, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v4, v10, v4
 ; GFX9-G-O0-NEXT:    v_xor_b32_e64 v3, v10, v3
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v2, v10, v2
 ; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v1, s[6:7], v1, v12
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v7, s[6:7], v4, v12, s[6:7]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v6, s[6:7], v3, v10, s[6:7]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v5, s[6:7], v2, v10, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2_vgpr3_vgpr4 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
-; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v14
+; GFX9-G-O0-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v2, s[6:7], v2, v12, s[6:7]
+; GFX9-G-O0-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v6, s[6:7], v4, v10, s[6:7]
+; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v3, s[6:7], v3, v10, s[6:7]
+; GFX9-G-O0-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v14
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v15
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v16
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v7
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v5, v11, v5
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v8, v11, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v8
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v8, v11, v5
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v5, v11, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v14
 ; GFX9-G-O0-NEXT:    v_xor_b32_e64 v7, v9, v7
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v9, v6
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v5, s[6:7], v5, v11
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v15, s[6:7], v8, v11, s[6:7]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v14, s[6:7], v7, v9, s[6:7]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v13, s[6:7], v6, v9, s[6:7]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr5 killed $vgpr5 def $vgpr5_vgpr6_vgpr7_vgpr8 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v15
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v14
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v13
-; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v4, v9, v4
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v8, s[6:7], v8, v11
 ; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v5, s[6:7], v5, v11, s[6:7]
+; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v7, s[6:7], v7, v9, s[6:7]
+; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v4, s[6:7], v4, v9, s[6:7]
+; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    v_xor_b32_e64 v13, v11, v12
 ; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    v_xor_b32_e64 v11, v11, v12
@@ -1560,97 +1550,69 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    v_xor_b32_e64 v9, v9, v10
 ; GFX9-G-O0-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v14
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v9, v12
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v11, v10, v11
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v8, v7
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v11, v5, v4
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
 ; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[6:7], v[9:10], v[11:12]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v14
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v9, v12
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v11, v10, v11
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v1, v6
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v11, v2, v3
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
 ; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
 ; GFX9-G-O0-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[8:9], v[5:6]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v11
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], v[11:12]
 ; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v5, v5
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 32
-; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v6, v7
-; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v6
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v8, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, 32
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v8, v8, v9
+; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v8
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, 64
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s10
-; GFX9-G-O0-NEXT:    v_add_u32_e64 v6, v5, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v9
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v5, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s10
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v5, v5, v8
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v4, v4
 ; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v7, v7
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, 32
 ; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v7, v8
-; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v7
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[8:9]
+; GFX9-G-O0-NEXT:    v_min_u32_e64 v4, v4, v7
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v5, s[8:9]
 ; GFX9-G-O0-NEXT:    s_mov_b32 s16, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v3
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[9:10], v[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v12
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v7, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v3
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[8:9], v[7:8], v[9:10]
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v5, v2
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v7, v1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, 32
 ; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v7, v8
-; GFX9-G-O0-NEXT:    v_min_u32_e64 v6, v6, v7
+; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v7
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s10
-; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v6, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v10
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v6, v6
-; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v8, v8
+; GFX9-G-O0-NEXT:    v_add_u32_e64 v7, v5, v7
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v5, v3
+; GFX9-G-O0-NEXT:    v_ffbh_u32_e64 v8, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, 32
 ; GFX9-G-O0-NEXT:    v_add_u32_e64 v8, v8, v9
-; GFX9-G-O0-NEXT:    v_min_u32_e64 v6, v6, v8
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[8:9]
+; GFX9-G-O0-NEXT:    v_min_u32_e64 v5, v5, v8
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v7, s[8:9]
 ; GFX9-G-O0-NEXT:    s_mov_b32 s15, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s11, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s14, 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, 0
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v6, s[8:9], v5, v6
-; GFX9-G-O0-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v7, s[8:9], v4, v5
+; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s16
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s16
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s16
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v7, s[8:9], v5, v7, s[8:9]
-; GFX9-G-O0-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v4, s[8:9], v4, v5, s[8:9]
+; GFX9-G-O0-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s15
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s14
 ; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v9, s[8:9], v5, v8, s[8:9]
@@ -1659,8 +1621,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s10
 ; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[8:9], v5, v8, s[8:9]
 ; GFX9-G-O0-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v7
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v9
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v8
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, s5
@@ -1685,35 +1647,27 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v5, v10
 ; GFX9-G-O0-NEXT:    s_mov_b32 s7, 0x7f
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v6, v6, s7
-; GFX9-G-O0-NEXT:    v_xor_b32_e64 v7, v7, s6
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v6, v6, v9
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v8, v7, v8
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s4
-; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[6:7], v[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v2
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v3
-; GFX9-G-O0-NEXT:    v_and_b32_e32 v1, 1, v5
-; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v1
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v7, v7, s7
+; GFX9-G-O0-NEXT:    v_xor_b32_e64 v4, v4, s6
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v7, v7, v9
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v8
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, s4
+; GFX9-G-O0-NEXT:    v_cmp_eq_u64_e64 s[4:5], v[7:8], v[9:10]
+; GFX9-G-O0-NEXT:    v_and_b32_e32 v4, 1, v5
+; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v7
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v4, s[6:7]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v2, v3, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v2, v4, s[6:7]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v3
-; GFX9-G-O0-NEXT:    v_and_b32_e32 v3, 1, v5
-; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v3
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v4
+; GFX9-G-O0-NEXT:    v_and_b32_e32 v4, 1, v5
+; GFX9-G-O0-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, 0
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v9
 ; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[6:7]
 ; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[6:7]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
@@ -1883,10 +1837,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-G-O0-NEXT:    v_readlane_b32 s6, v16, 6
 ; GFX9-G-O0-NEXT:    v_readlane_b32 s7, v16, 7
-; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
@@ -1899,14 +1853,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(16)
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v2
@@ -1915,7 +1869,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v5
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[21:22], v2, v[0:1]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[14:15], v2, v[0:1]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[4:5], v2, v[3:4]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
@@ -1929,9 +1883,9 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v5
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v7, v2, v3
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v5, v0, v1
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr12_vgpr13 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v14
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v15
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr23_vgpr24 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v25
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v26
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr2 killed $vgpr0 killed $exec
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr1 killed $vgpr1 killed $vgpr0_vgpr1 killed $exec
 ; GFX9-G-O0-NEXT:    s_mov_b32 s9, 31
@@ -1939,81 +1893,73 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v3, v0, v1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s9, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s9
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v15
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v2, v3
 ; GFX9-G-O0-NEXT:    v_or_b32_e64 v9, v0, v1
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v12
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v14
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v15
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v23
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v24
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v25
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v26
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[23:24], v0, v[2:3]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[27:28], v0, v[2:3]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, s8
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v0, v[12:13]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr2 killed $exec
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[0:1], v0, v[14:15]
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr2 killed $exec
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr3 killed $vgpr3 killed $vgpr2_vgpr3 killed $exec
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 31
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s8
-; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v14, v2, v3
+; GFX9-G-O0-NEXT:    v_lshrrev_b32_e64 v23, v2, v3
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(8)
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v29, v31
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v30, v32
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v33
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v34
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v25, v33
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v26, v34
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v0, v29
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v30
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v23
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v24
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v15
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v13, v1, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v24, v27
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v28
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v0, v0, v24
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v15, v1, v15
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v22
-; GFX9-G-O0-NEXT:    v_or3_b32 v12, v12, v14, v15
-; GFX9-G-O0-NEXT:    v_or3_b32 v2, v2, v3, v13
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, v15
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v24, v25
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v26
+; GFX9-G-O0-NEXT:    v_or3_b32 v14, v14, v23, v24
+; GFX9-G-O0-NEXT:    v_or3_b32 v2, v2, v3, v15
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr14 killed $vgpr14 def $vgpr14_vgpr15 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v2
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr0_vgpr1 killed $vgpr0_vgpr1 def $vgpr0_vgpr1_vgpr2_vgpr3 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v12
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v15
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v11, s[8:9], v11, v4
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v10, s[8:9], v10, v9, s[8:9]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v8, s[8:9], v8, v7, s[8:9]
-; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v10, s[8:9], v6, v5, s[8:9]
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v13, s[8:9], v13, v4
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v12, s[8:9], v12, v9, s[8:9]
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v10, s[8:9], v10, v7, s[8:9]
+; GFX9-G-O0-NEXT:    v_subb_co_u32_e64 v12, s[8:9], v6, v5, s[8:9]
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 31
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s8
-; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v8, v6, v10
+; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v10, v6, v12
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 31
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s8
-; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v6, v6, v10
+; GFX9-G-O0-NEXT:    v_ashrrev_i32_e64 v6, v6, v12
 ; GFX9-G-O0-NEXT:    s_mov_b32 s9, 1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 0
-; GFX9-G-O0-NEXT:    v_and_b32_e64 v12, v8, s9
-; GFX9-G-O0-NEXT:    v_and_b32_e64 v10, v8, s8
+; GFX9-G-O0-NEXT:    v_and_b32_e64 v12, v10, s9
+; GFX9-G-O0-NEXT:    v_and_b32_e64 v14, v10, s8
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr12 killed $vgpr12 def $vgpr12_vgpr13 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, s5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, s4
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v14
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v24, s5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, s4
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr12_vgpr13 killed $vgpr12_vgpr13 def $vgpr12_vgpr13_vgpr14_vgpr15 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v25
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v24, v26
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v27
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v28
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v23
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v24
-; GFX9-G-O0-NEXT:    v_and_b32_e64 v11, v8, v11
-; GFX9-G-O0-NEXT:    v_and_b32_e64 v10, v8, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, v21
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v22
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v23
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v24
+; GFX9-G-O0-NEXT:    v_and_b32_e64 v11, v10, v11
+; GFX9-G-O0-NEXT:    v_and_b32_e64 v10, v10, v22
 ; GFX9-G-O0-NEXT:    v_and_b32_e64 v8, v6, v8
 ; GFX9-G-O0-NEXT:    v_and_b32_e64 v6, v6, v21
 ; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v4, s[8:9], v4, v11
@@ -2114,66 +2060,62 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
 ; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b32 s4, 64
-; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v4
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v7
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v21, v6
+; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v18
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v24, v17
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr21 killed $vgpr21 def $vgpr21_vgpr22 killed $exec
+; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v22, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v4, v13, v4
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v4, v19, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s4
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v5, v5, v13
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v5, v5, v19
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s4
-; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v13, v6
+; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[4:5], v19, v6
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s6
-; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v13, v6
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[6:7], v13, v[21:22]
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[26:27], v13, v[15:16]
+; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v19, v6
+; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[6:7], v19, v[21:22]
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[26:27], v19, v[23:24]
 ; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[24:25], v5, v[21:22]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v26
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v20, v26
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v27
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v23, v24
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v25
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v14, v14, v23
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v13, v5, v13
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v19, v25
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v20, v20, v23
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v19, v5, v19
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[8:9], 0
 ; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[21:22], v4, v[21:22]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v21
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v22
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v14, s[4:5]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[4:5]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v15
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v16
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v14, s[6:7]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v13, v5, v13, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v20, s[4:5]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v5, v5, v19, s[4:5]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v4, v4, v18, s[6:7]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v17, v5, v17, s[6:7]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, v17
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v6
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr7 killed $vgpr7 killed $vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, 0
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, 0
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[4:5]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v17, v17, v18, s[4:5]
 ; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
-; GFX9-G-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v6
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr17 killed $vgpr17 def $vgpr17_vgpr18 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v18, v6
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr4_vgpr5 killed $vgpr4_vgpr5 def $vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v13
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v14
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v17
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v18
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v19
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v20
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, v17
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, v18
 ; GFX9-G-O0-NEXT:    s_mov_b32 s4, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, -1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s7, -1
@@ -2226,14 +2168,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_or_saveexec_b64 s[20:21], -1
 ; GFX9-G-O0-NEXT:    buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 exec, s[20:21]
-; GFX9-G-O0-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
-; GFX9-G-O0-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
+; GFX9-G-O0-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
 ; GFX9-G-O0-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 1
 ; GFX9-G-O0-NEXT:    s_mov_b32 s10, 0
@@ -2241,48 +2183,50 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    s_mov_b32 s8, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v5, s6
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(3)
-; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v5, s[6:7], v2, v5
+; GFX9-G-O0-NEXT:    v_add_co_u32_e64 v5, s[6:7], v3, v5
 ; GFX9-G-O0-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v6, s10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s10
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v4, v6, s[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, s9
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v8, s[6:7], v3, v4, s[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, s8
-; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v7, s[6:7], v1, v3, s[6:7]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v5
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v6
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v8
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v7
-; GFX9-G-O0-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v6, s[6:7], v6, v8, s[6:7]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v8, s9
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v8, s[6:7], v7, v8, s[6:7]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v7, s8
+; GFX9-G-O0-NEXT:    v_addc_co_u32_e64 v7, s[6:7], v2, v7, s[6:7]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v5
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v6
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v16, v8
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v17, v7
+; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-G-O0-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
-; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; GFX9-G-O0-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0x7f
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v4, s[6:7], v1, v2
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, s6
+; GFX9-G-O0-NEXT:    v_sub_co_u32_e64 v9, s[6:7], v2, v3
 ; GFX9-G-O0-NEXT:    s_mov_b32 s7, 64
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v10
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v13, v9
+; GFX9-G-O0-NEXT:    ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v14, v1
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v10
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v4
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v3, v4, v1
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v3, v9, v1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT:    v_sub_u32_e64 v9, v1, v4
+; GFX9-G-O0-NEXT:    v_sub_u32_e64 v15, v1, v9
 ; GFX9-G-O0-NEXT:    s_mov_b32 s6, 0
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[8:9], v4, v1
+; GFX9-G-O0-NEXT:    v_cmp_lt_u32_e64 s[8:9], v9, v1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v4, v1
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[1:2], v4, v[13:14]
-; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[18:19], v9, v[13:14]
-; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[16:17], v4, v[11:12]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v18
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v19
+; GFX9-G-O0-NEXT:    v_cmp_eq_u32_e64 s[6:7], v9, v1
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[1:2], v9, v[13:14]
+; GFX9-G-O0-NEXT:    v_lshrrev_b64 v[18:19], v15, v[13:14]
+; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[16:17], v9, v[11:12]
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v12, v18
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v19
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v15, v16
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v17
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v10, v10, v15
-; GFX9-G-O0-NEXT:    v_or_b32_e64 v4, v4, v9
+; GFX9-G-O0-NEXT:    v_mov_b32_e32 v11, v17
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v12, v12, v15
+; GFX9-G-O0-NEXT:    v_or_b32_e64 v11, v9, v11
 ; GFX9-G-O0-NEXT:    v_lshlrev_b64 v[13:14], v3, v[13:14]
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v2
@@ -2294,10 +2238,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v2, v3
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v9, v13
 ; GFX9-G-O0-NEXT:    v_mov_b32_e32 v3, v14
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[8:9]
-; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[8:9]
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v10, v11
-; GFX9-G-O0-NEXT:    v_mov_b32_e32 v4, v12
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v9, v12, s[8:9]
+; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s[8:9]
 ; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v9, v9, v10, s[6:7]
 ; GFX9-G-O0-NEXT:    v_cndmask_b32_e64 v3, v3, v4, s[6:7]
 ; GFX9-G-O0-NEXT:    ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec

>From 929566d2b08a9c64d768bfbce2c4fec21c34ab72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 24 May 2024 13:09:28 +0200
Subject: [PATCH 4/6] add observer

---
 llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp       |  5 +++--
 .../CodeGen/AArch64/GlobalISel/combine-freeze.mir    | 12 ++++++------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index abecee4259030..23b8403657b26 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -265,8 +265,9 @@ bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand(
     }
   }
 
-  // FIXME: observer must be aware of dropping
-  //  cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
+  Observer.changingInstr(*OrigDef);
+  cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
+  Observer.changedInstr(*OrigDef);
 
   // Eliminate freeze if all operands are guaranteed non-poison.
   if (!MaybePoisonOperand) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index dfa45d96fe94c..0d41be09747c9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -189,8 +189,8 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 9
     ; CHECK-NEXT: %c:_(s32) = G_CONSTANT_FOLD_BARRIER %cst
-    ; CHECK-NEXT: %3:_(s64) = nneg G_ZEXT %c(s32)
-    ; CHECK-NEXT: $x0 = COPY %3(s64)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %c(s32)
+    ; CHECK-NEXT: $x0 = COPY [[ZEXT]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
     %cst:_(s32) = G_CONSTANT i32 9
@@ -255,7 +255,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
-    ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = exact G_UDIV %c, %c
+    ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV %c, %c
     ; CHECK-NEXT: $x0 = COPY [[UDIV]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
@@ -299,7 +299,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
-    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = nsw G_MUL %c, %c
+    ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL %c, %c
     ; CHECK-NEXT: $x0 = COPY [[MUL]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
@@ -343,7 +343,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = nuw G_TRUNC %c(s64)
+    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %c(s64)
     ; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(s64) = COPY $x0
@@ -387,7 +387,7 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %cst:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: %c:_(s64) = G_CONSTANT_FOLD_BARRIER %cst
-    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = nuw G_ADD %c, %c
+    ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %c, %c
     ; CHECK-NEXT: $x0 = COPY [[ADD]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0

>From 62a99c80e27ca964952c05e3800fac4fa0d16193 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 24 May 2024 13:47:26 +0200
Subject: [PATCH 5/6] remove freeze combine

---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  4 --
 .../include/llvm/Target/GlobalISel/Combine.td |  8 ---
 .../GlobalISel/CombinerHelperVectorOps.cpp    | 53 -------------------
 3 files changed, 65 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 2111e82e1a99d..2ddf20ebe7af7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -840,10 +840,6 @@ class CombinerHelper {
   /// Combine extract vector element.
   bool matchExtractVectorElement(MachineInstr &MI, BuildFnTy &MatchInfo);
 
-  /// Combine extract vector element with freeze on the vector register.
-  bool matchExtractVectorElementWithFreeze(const MachineOperand &MO,
-                                           BuildFnTy &MatchInfo);
-
   /// Combine extract vector element with a build vector on the vector register.
   bool matchExtractVectorElementWithBuildVector(const MachineOperand &MO,
                                                 BuildFnTy &MatchInfo);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 47f73daf20891..36001162b3dc8 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1532,13 +1532,6 @@ def extract_vector_element_build_vector_trunc8 : GICombineRule<
    [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]),
    (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
 
-def extract_vector_element_freeze : GICombineRule<
-   (defs root:$root, build_fn_matchinfo:$matchinfo),
-   (match (G_FREEZE $src, $input),
-          (G_EXTRACT_VECTOR_ELT $root, $src, $idx),
-   [{ return Helper.matchExtractVectorElementWithFreeze(${root}, ${matchinfo}); }]),
-   (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;
-
 def sext_trunc : GICombineRule<
    (defs root:$root, build_fn_matchinfo:$matchinfo),
    (match (G_TRUNC $src, $x, (MIFlags NoSWrap)),
@@ -1636,7 +1629,6 @@ extract_vector_element_build_vector_trunc5,
 extract_vector_element_build_vector_trunc6,
 extract_vector_element_build_vector_trunc7,
 extract_vector_element_build_vector_trunc8,
-//extract_vector_element_freeze,
 extract_vector_element_shuffle_vector,
 insert_vector_element_extract_vector_element
 ]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
index 21b1eb2628174..b4765fb280f9d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp
@@ -144,59 +144,6 @@ bool CombinerHelper::matchExtractVectorElementWithDifferentIndices(
   return false;
 }
 
-bool CombinerHelper::matchExtractVectorElementWithFreeze(
-    const MachineOperand &MO, BuildFnTy &MatchInfo) {
-  MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);
-  GExtractVectorElement *Extract = cast<GExtractVectorElement>(Root);
-
-  Register Vector = Extract->getVectorReg();
-
-  //
-  //  %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
-  //  %freeze:_(<2 x s32>) = G_FREEZE %bv(<2 x s32>)
-  //  %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
-  //
-  //  -->
-  //
-  //  %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32)
-  //  %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64)
-  //  %freeze:_(s32) = G_FREEZE %extract(s32)
-  //
-  //
-
-  // For G_FREEZE, the input and the output types are identical. Moving the
-  // freeze from the Vector into the front of the extract preserves the freeze
-  // semantics. The result is still freeze'd. Furthermore, the Vector register
-  // becomes easier to analyze. A build vector could have been hidden behind the
-  // freeze.
-
-  // We expect a freeze on the Vector register.
-  GFreeze *Freeze = getOpcodeDef<GFreeze>(Vector, MRI);
-  if (!Freeze)
-    return false;
-
-  Register Dst = Extract->getReg(0);
-  LLT DstTy = MRI.getType(Dst);
-
-  // We first have to check for one-use and legality of the freeze.
-  // The type of the extractVectorElement did not change.
-  if (!MRI.hasOneNonDBGUse(Freeze->getReg(0)) ||
-      !isLegalOrBeforeLegalizer({TargetOpcode::G_FREEZE, {DstTy}}))
-    return false;
-
-  Register Index = Extract->getIndexReg();
-
-  // We move the freeze from the Vector register in front of the
-  // extractVectorElement.
-  MatchInfo = [=](MachineIRBuilder &B) {
-    auto Extract =
-        B.buildExtractVectorElement(DstTy, Freeze->getSourceReg(), Index);
-    B.buildFreeze(Dst, Extract);
-  };
-
-  return true;
-}
-
 bool CombinerHelper::matchExtractVectorElementWithBuildVector(
     const MachineOperand &MO, BuildFnTy &MatchInfo) {
   MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI);

>From 017df6bd4f47b369992acecfa111fa9900c5104c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sat, 25 May 2024 17:40:48 +0200
Subject: [PATCH 6/6] address review comments

---
 .../include/llvm/Target/GlobalISel/Combine.td |   4 +-
 .../AArch64/GlobalISel/combine-freeze.mir     |   7 +-
 llvm/test/CodeGen/AArch64/fast-isel-select.ll | 222 ++++++++++++++----
 3 files changed, 183 insertions(+), 50 deletions(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 36001162b3dc8..383589add7755 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1705,14 +1705,14 @@ def integer_reassoc_combines: GICombineGroup<[
   APlusBMinusCPlusA
 ]>;
 
-def freeze_of_not_undef_and_poison : GICombineRule<
+def freeze_of_non_undef_non_poison : GICombineRule<
    (defs root:$root),
    (match (G_FREEZE $root, $src),
           [{ return isGuaranteedNotToBeUndefOrPoison(${src}.getReg(), MRI); }]),
    (apply (GIReplaceReg $root, $src))>;
 
 def freeze_combines: GICombineGroup<[
-  freeze_of_not_undef_and_poison,
+  freeze_of_non_undef_non_poison,
   push_freeze_to_prevent_poison_from_propagating
 ]>;
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
index 0d41be09747c9..2450f109c406a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-freeze.mir
@@ -16,8 +16,8 @@ body:             |
     ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %0:_(s64) = COPY $x0
-    %2:_(s64) = G_FREEZE %0
-    $x0 = COPY %2(s64)
+    %1:_(s64) = G_FREEZE %0
+    $x0 = COPY %1(s64)
     RET_ReallyLR implicit $x0
 
 ...
@@ -33,7 +33,6 @@ body:             |
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 9
     ; CHECK-NEXT: $x0 = COPY [[C]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
-    %0:_(s64) = COPY $x0
     %1:_(s64) = G_CONSTANT i64 9
     %2:_(s64) = G_FREEZE %1
     $x0 = COPY %2(s64)
@@ -52,7 +51,6 @@ body:             |
     ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 9.000000e+00
     ; CHECK-NEXT: $x0 = COPY [[C]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
-    %0:_(s64) = COPY $x0
     %1:_(s64) = G_FCONSTANT double 9.0
     %2:_(s64) = G_FREEZE %1
     $x0 = COPY %2(s64)
@@ -71,7 +69,6 @@ body:             |
     ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[DEF]]
     ; CHECK-NEXT: $x0 = COPY [[FREEZE]](s64)
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
-    %0:_(s64) = COPY $x0
     %1:_(s64) = G_IMPLICIT_DEF
     %2:_(s64) = G_FREEZE %1
     $x0 = COPY %2(s64)
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-select.ll b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
index 9184066fc8107..65701343ccc1e 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-select.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-select.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefix=GISEL
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FASTISEL
+; RUN: llc -mtriple=aarch64-apple-darwin -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GISEL
 
 ; First test the different supported value types for select.
 define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
@@ -10,6 +10,18 @@ define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
 ; GISEL-NEXT:    tst w0, #0x1
 ; GISEL-NEXT:    csel w0, w1, w2, ne
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_i1:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    tst w0, #0x1
+; CHECK-FASTISEL-NEXT:    csel w8, w1, w2, ne
+; CHECK-FASTISEL-NEXT:    and w0, w8, #0x1
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_i1:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    tst w0, #0x1
+; CHECK-GISEL-NEXT:    csel w0, w1, w2, ne
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, i1 %a, i1 %b
   ret i1 %1
 }
@@ -20,6 +32,18 @@ define zeroext i8 @select_i8(i1 zeroext %c, i8 zeroext %a, i8 zeroext %b) {
 ; GISEL-NEXT:    tst w0, #0x1
 ; GISEL-NEXT:    csel w0, w1, w2, ne
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_i8:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    tst w0, #0x1
+; CHECK-FASTISEL-NEXT:    csel w8, w1, w2, ne
+; CHECK-FASTISEL-NEXT:    uxtb w0, w8
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_i8:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    tst w0, #0x1
+; CHECK-GISEL-NEXT:    csel w0, w1, w2, ne
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, i8 %a, i8 %b
   ret i8 %1
 }
@@ -30,6 +54,18 @@ define zeroext i16 @select_i16(i1 zeroext %c, i16 zeroext %a, i16 zeroext %b) {
 ; GISEL-NEXT:    tst w0, #0x1
 ; GISEL-NEXT:    csel w0, w1, w2, ne
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_i16:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    tst w0, #0x1
+; CHECK-FASTISEL-NEXT:    csel w8, w1, w2, ne
+; CHECK-FASTISEL-NEXT:    uxth w0, w8
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_i16:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    tst w0, #0x1
+; CHECK-GISEL-NEXT:    csel w0, w1, w2, ne
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, i16 %a, i16 %b
   ret i16 %1
 }
@@ -40,6 +76,17 @@ define i32 @select_i32(i1 zeroext %c, i32 %a, i32 %b) {
 ; GISEL-NEXT:    tst w0, #0x1
 ; GISEL-NEXT:    csel w0, w1, w2, ne
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_i32:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    tst w0, #0x1
+; CHECK-FASTISEL-NEXT:    csel w0, w1, w2, ne
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_i32:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    tst w0, #0x1
+; CHECK-GISEL-NEXT:    csel w0, w1, w2, ne
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, i32 %a, i32 %b
   ret i32 %1
 }
@@ -50,6 +97,17 @@ define i64 @select_i64(i1 zeroext %c, i64 %a, i64 %b) {
 ; GISEL-NEXT:    tst w0, #0x1
 ; GISEL-NEXT:    csel x0, x1, x2, ne
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_i64:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    tst w0, #0x1
+; CHECK-FASTISEL-NEXT:    csel x0, x1, x2, ne
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_i64:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    tst w0, #0x1
+; CHECK-GISEL-NEXT:    csel x0, x1, x2, ne
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, i64 %a, i64 %b
   ret i64 %1
 }
@@ -60,6 +118,17 @@ define float @select_f32(i1 zeroext %c, float %a, float %b) {
 ; GISEL-NEXT:    tst w0, #0x1
 ; GISEL-NEXT:    fcsel s0, s0, s1, ne
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_f32:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    tst w0, #0x1
+; CHECK-FASTISEL-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_f32:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    tst w0, #0x1
+; CHECK-GISEL-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, float %a, float %b
   ret float %1
 }
@@ -70,17 +139,33 @@ define double @select_f64(i1 zeroext %c, double %a, double %b) {
 ; GISEL-NEXT:    tst w0, #0x1
 ; GISEL-NEXT:    fcsel d0, d0, d1, ne
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_f64:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    tst w0, #0x1
+; CHECK-FASTISEL-NEXT:    fcsel d0, d0, d1, ne
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_f64:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    tst w0, #0x1
+; CHECK-GISEL-NEXT:    fcsel d0, d0, d1, ne
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, double %a, double %b
   ret double %1
 }
 
 ; Now test the folding of all compares.
 define float @select_fcmp_false(float %x, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_false:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fmov s0, s2
-; CHECK-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_fcmp_false:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    fmov s0, s2
+; CHECK-FASTISEL-NEXT:    ret
 ;
+; CHECK-GISEL-LABEL: select_fcmp_false:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    fcmp s0, s0
+; CHECK-GISEL-NEXT:    fcsel s0, s1, s2, gt
+; CHECK-GISEL-NEXT:    ret
 ; GISEL-LABEL: select_fcmp_false:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s0
@@ -97,7 +182,6 @@ define float @select_fcmp_ogt(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, gt
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_ogt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -114,7 +198,6 @@ define float @select_fcmp_oge(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, ge
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_oge:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -131,7 +214,6 @@ define float @select_fcmp_olt(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, mi
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_olt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -148,7 +230,6 @@ define float @select_fcmp_ole(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, ls
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_ole:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -160,13 +241,22 @@ define float @select_fcmp_ole(float %x, float %y, float %a, float %b) {
 }
 
 define float @select_fcmp_one(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_one:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp s0, s1
-; CHECK-NEXT:    fcsel s0, s2, s3, mi
-; CHECK-NEXT:    fcsel s0, s2, s0, gt
-; CHECK-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_fcmp_one:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    fcmp s0, s1
+; CHECK-FASTISEL-NEXT:    fcsel s0, s2, s3, mi
+; CHECK-FASTISEL-NEXT:    fcsel s0, s2, s0, gt
+; CHECK-FASTISEL-NEXT:    ret
 ;
+; CHECK-GISEL-LABEL: select_fcmp_one:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    fcmp s0, s1
+; CHECK-GISEL-NEXT:    cset w8, mi
+; CHECK-GISEL-NEXT:    cset w9, gt
+; CHECK-GISEL-NEXT:    orr w8, w8, w9
+; CHECK-GISEL-NEXT:    tst w8, #0x1
+; CHECK-GISEL-NEXT:    fcsel s0, s2, s3, ne
+; CHECK-GISEL-NEXT:    ret
 ; GISEL-LABEL: select_fcmp_one:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -187,7 +277,6 @@ define float @select_fcmp_ord(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, vc
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_ord:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -204,7 +293,6 @@ define float @select_fcmp_uno(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, vs
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_uno:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -216,13 +304,22 @@ define float @select_fcmp_uno(float %x, float %y, float %a, float %b) {
 }
 
 define float @select_fcmp_ueq(float %x, float %y, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_ueq:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp s0, s1
-; CHECK-NEXT:    fcsel s0, s2, s3, eq
-; CHECK-NEXT:    fcsel s0, s2, s0, vs
-; CHECK-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_fcmp_ueq:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    fcmp s0, s1
+; CHECK-FASTISEL-NEXT:    fcsel s0, s2, s3, eq
+; CHECK-FASTISEL-NEXT:    fcsel s0, s2, s0, vs
+; CHECK-FASTISEL-NEXT:    ret
 ;
+; CHECK-GISEL-LABEL: select_fcmp_ueq:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    fcmp s0, s1
+; CHECK-GISEL-NEXT:    cset w8, eq
+; CHECK-GISEL-NEXT:    cset w9, vs
+; CHECK-GISEL-NEXT:    orr w8, w8, w9
+; CHECK-GISEL-NEXT:    tst w8, #0x1
+; CHECK-GISEL-NEXT:    fcsel s0, s2, s3, ne
+; CHECK-GISEL-NEXT:    ret
 ; GISEL-LABEL: select_fcmp_ueq:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -243,7 +340,6 @@ define float @select_fcmp_ugt(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, hi
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_ugt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -260,7 +356,6 @@ define float @select_fcmp_uge(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, pl
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_uge:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -277,7 +372,6 @@ define float @select_fcmp_ult(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, lt
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_ult:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -295,7 +389,6 @@ define float @select_fcmp_ule(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, le
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_ule:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -312,7 +405,6 @@ define float @select_fcmp_une(float %x, float %y, float %a, float %b) {
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, ne
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_fcmp_une:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s1
@@ -324,11 +416,20 @@ define float @select_fcmp_une(float %x, float %y, float %a, float %b) {
 }
 
 define float @select_fcmp_true(float %x, float %a, float %b) {
-; CHECK-LABEL: select_fcmp_true:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fmov s0, s1
-; CHECK-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_fcmp_true:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    fmov s0, s1
+; CHECK-FASTISEL-NEXT:    ret
 ;
+; CHECK-GISEL-LABEL: select_fcmp_true:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    fcmp s0, s0
+; CHECK-GISEL-NEXT:    cset w8, eq
+; CHECK-GISEL-NEXT:    cset w9, vs
+; CHECK-GISEL-NEXT:    orr w8, w8, w9
+; CHECK-GISEL-NEXT:    tst w8, #0x1
+; CHECK-GISEL-NEXT:    fcsel s0, s1, s2, ne
+; CHECK-GISEL-NEXT:    ret
 ; GISEL-LABEL: select_fcmp_true:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    fcmp s0, s0
@@ -349,7 +450,6 @@ define float @select_icmp_eq(i32 %x, i32 %y, float %a, float %b) {
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    fcsel s0, s0, s1, eq
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_icmp_eq:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    cmp w0, w1
@@ -366,7 +466,6 @@ define float @select_icmp_ne(i32 %x, i32 %y, float %a, float %b) {
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    fcsel s0, s0, s1, ne
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_icmp_ne:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    cmp w0, w1
@@ -383,7 +482,6 @@ define float @select_icmp_ugt(i32 %x, i32 %y, float %a, float %b) {
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    fcsel s0, s0, s1, hi
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_icmp_ugt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    cmp w0, w1
@@ -400,7 +498,6 @@ define float @select_icmp_uge(i32 %x, i32 %y, float %a, float %b) {
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    fcsel s0, s0, s1, hs
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_icmp_uge:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    cmp w0, w1
@@ -417,7 +514,6 @@ define float @select_icmp_ult(i32 %x, i32 %y, float %a, float %b) {
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    fcsel s0, s0, s1, lo
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_icmp_ult:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    cmp w0, w1
@@ -434,7 +530,6 @@ define float @select_icmp_ule(i32 %x, i32 %y, float %a, float %b) {
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    fcsel s0, s0, s1, ls
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_icmp_ule:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    cmp w0, w1
@@ -451,7 +546,6 @@ define float @select_icmp_sgt(i32 %x, i32 %y, float %a, float %b) {
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    fcsel s0, s0, s1, gt
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_icmp_sgt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    cmp w0, w1
@@ -468,7 +562,6 @@ define float @select_icmp_sge(i32 %x, i32 %y, float %a, float %b) {
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    fcsel s0, s0, s1, ge
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_icmp_sge:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    cmp w0, w1
@@ -485,7 +578,6 @@ define float @select_icmp_slt(i32 %x, i32 %y, float %a, float %b) {
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    fcsel s0, s0, s1, lt
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_icmp_slt:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    cmp w0, w1
@@ -502,7 +594,6 @@ define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
 ; CHECK-NEXT:    cmp w0, w1
 ; CHECK-NEXT:    fcsel s0, s0, s1, le
 ; CHECK-NEXT:    ret
-;
 ; GISEL-LABEL: select_icmp_sle:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    cmp w0, w1
@@ -520,6 +611,17 @@ define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
 ; GISEL-NEXT:    orr w8, w0, w1
 ; GISEL-NEXT:    and w0, w8, #0x1
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_opt1:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    orr w8, w0, w1
+; CHECK-FASTISEL-NEXT:    and w0, w8, #0x1
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_opt1:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    orr w8, w0, w1
+; CHECK-GISEL-NEXT:    and w0, w8, #0x1
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, i1 true, i1 %a
   ret i1 %1
 }
@@ -531,6 +633,19 @@ define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
 ; GISEL-NEXT:    orr w8, w8, w1
 ; GISEL-NEXT:    and w0, w8, #0x1
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_opt2:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    eor w8, w0, #0x1
+; CHECK-FASTISEL-NEXT:    orr w8, w8, w1
+; CHECK-FASTISEL-NEXT:    and w0, w8, #0x1
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_opt2:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    eor w8, w0, #0x1
+; CHECK-GISEL-NEXT:    orr w8, w8, w1
+; CHECK-GISEL-NEXT:    and w0, w8, #0x1
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, i1 %a, i1 true
   ret i1 %1
 }
@@ -541,6 +656,17 @@ define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
 ; GISEL-NEXT:    eor w8, w0, #0x1
 ; GISEL-NEXT:    and w0, w8, w1
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_opt3:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    bic w8, w1, w0
+; CHECK-FASTISEL-NEXT:    and w0, w8, #0x1
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_opt3:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    eor w8, w0, #0x1
+; CHECK-GISEL-NEXT:    and w0, w8, w1
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, i1 false, i1 %a
   ret i1 %1
 }
@@ -550,6 +676,16 @@ define zeroext i1 @select_opt4(i1 zeroext %c, i1 zeroext %a) {
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    and w0, w0, w1
 ; GISEL-NEXT:    ret
+; CHECK-FASTISEL-LABEL: select_opt4:
+; CHECK-FASTISEL:       ; %bb.0:
+; CHECK-FASTISEL-NEXT:    and w8, w0, w1
+; CHECK-FASTISEL-NEXT:    and w0, w8, #0x1
+; CHECK-FASTISEL-NEXT:    ret
+;
+; CHECK-GISEL-LABEL: select_opt4:
+; CHECK-GISEL:       ; %bb.0:
+; CHECK-GISEL-NEXT:    and w0, w0, w1
+; CHECK-GISEL-NEXT:    ret
   %1 = select i1 %c, i1 %a, i1 false
   ret i1 %1
 }



More information about the llvm-commits mailing list