[llvm] 49c3565 - [AArch64][GlobalISel] Swap compare operands when it may be profitable

Fri Apr 9 15:47:13 PDT 2021

Author: Jessica Paquette
Date: 2021-04-09T15:46:48-07:00
New Revision: 49c3565b9ba738d0305ab97c2771fa37f2023726

URL: https://github.com/llvm/llvm-project/commit/49c3565b9ba738d0305ab97c2771fa37f2023726
DIFF: https://github.com/llvm/llvm-project/commit/49c3565b9ba738d0305ab97c2771fa37f2023726.diff

LOG: [AArch64][GlobalISel] Swap compare operands when it may be profitable

This adds support for swapping comparison operands when it may introduce new
folding opportunities.

This is roughly the same as the code added to AArch64ISelLowering in
162435e7b5e026b9f988c730bb6527683f6aa853.

For an example of a testcase which exercises this, see
llvm/test/CodeGen/AArch64/swap-compare-operands.ll

(Godbolt for that testcase: https://godbolt.org/z/43WEMb)

The idea behind this is that sometimes, we may be able to fold away, say, a
shift or extend in a compare by swapping its operands.

e.g. in the case of this compare:

```
lsl x8, x0, #1
cmp x8, x1
cset w0, lt
```

The following is equivalent:

```
cmp x1, x0, lsl #1
cset w0, gt
```

Most of the code here is just a reimplementation of what already exists in
AArch64ISelLowering.

(See `getCmpOperandFoldingProfit` and `getAArch64Cmp` for the equivalent code.)

Note that most of the AND code in the testcase doesn't actually fold. It seems
like we're missing selection support for that sort of fold right now, since SDAG
happily folds these away (e.g testSwapCmpWithShiftedZeroExtend8_32 in the
original .ll testcase)

Differential Revision: https://reviews.llvm.org/D89422

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-swap-compare-operands.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
    llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index aa6a3c967b119..07608fc56990d 100644

--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -127,7 +127,14 @@ def adjust_icmp_imm : GICombineRule <
   (apply [{ applyAdjustICmpImmAndPred(*${root}, ${matchinfo}, B, Observer); }])
 >;
 
-def icmp_lowering : GICombineGroup<[adjust_icmp_imm]>;
+def swap_icmp_operands : GICombineRule <
+  (defs root:$root),
+  (match (wip_match_opcode G_ICMP):$root,
+          [{ return trySwapICmpOperands(*${root}, MRI); }]),
+  (apply [{ applySwapICmpOperands(*${root}, Observer); }])
+>;
+
+def icmp_lowering : GICombineGroup<[adjust_icmp_imm, swap_icmp_operands]>;
 
 def extractvecelt_pairwise_add_matchdata : GIDefMatchData<"std::tuple<unsigned, LLT, Register>">;
 def extractvecelt_pairwise_add : GICombineRule<

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 7c543028af9f5..a1392ccb59e6c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -29,10 +29,31 @@ AArch64GISelUtils::getAArch64VectorSplat(const MachineInstr &MI,
   return RegOrConstant(Src);
 }
 
-Optional<int64_t> AArch64GISelUtils::getAArch64VectorSplatScalar(
-    const MachineInstr &MI, const MachineRegisterInfo &MRI) {
+Optional<int64_t>
+AArch64GISelUtils::getAArch64VectorSplatScalar(const MachineInstr &MI,
+                                               const MachineRegisterInfo &MRI) {
   auto Splat = getAArch64VectorSplat(MI, MRI);
   if (!Splat || Splat->isReg())
     return None;
   return Splat->getCst();
 }
+
+bool AArch64GISelUtils::isCMN(const MachineInstr *MaybeSub,
+                              const CmpInst::Predicate &Pred,
+                              const MachineRegisterInfo &MRI) {
+  // Match:
+  //
+  // %sub = G_SUB 0, %y
+  // %cmp = G_ICMP eq/ne, %sub, %z
+  //
+  // Or
+  //
+  // %sub = G_SUB 0, %y
+  // %cmp = G_ICMP eq/ne, %z, %sub
+  if (!MaybeSub || MaybeSub->getOpcode() != TargetOpcode::G_SUB ||
+      !CmpInst::isEquality(Pred))
+    return false;
+  auto MaybeZero =
+      getConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI);
+  return MaybeZero && MaybeZero->Value.getZExtValue() == 0;
+}

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
index b1e575d4e4d6c..142d999ef05a2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h
@@ -15,9 +15,12 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/Register.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/IR/InstrTypes.h"
 #include <cstdint>
 
 namespace llvm {
+
 namespace AArch64GISelUtils {
 
 /// \returns true if \p C is a legal immediate operand for an arithmetic
@@ -36,6 +39,11 @@ Optional<RegOrConstant> getAArch64VectorSplat(const MachineInstr &MI,
 Optional<int64_t> getAArch64VectorSplatScalar(const MachineInstr &MI,
                                               const MachineRegisterInfo &MRI);
 
+/// \returns true if \p MaybeSub and \p Pred are part of a CMN tree for an
+/// integer compare.
+bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred,
+           const MachineRegisterInfo &MRI);
+
 } // namespace AArch64GISelUtils
 } // namespace llvm
 

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 50b352b5eaf94..7160432884fee 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -18,6 +18,7 @@
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
+#include "AArch64GlobalISelUtils.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "llvm/ADT/Optional.h"
@@ -4577,37 +4578,10 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
   //
   // cmn z, y
 
-  // Helper lambda to detect the subtract followed by the compare.
-  // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
-  auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
-    if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
-      return false;
-
-    // Need to make sure NZCV is the same at the end of the transformation.
-    if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
-      return false;
-
-    // We want to match against SUBs.
-    if (DefMI->getOpcode() != TargetOpcode::G_SUB)
-      return false;
-
-    // Make sure that we're getting
-    // x = G_SUB 0, y
-    auto ValAndVReg =
-        getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
-    if (!ValAndVReg || ValAndVReg->Value != 0)
-      return false;
-
-    // This can safely be represented as a CMN.
-    return true;
-  };
-
   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
-  CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
-  const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
-
+  auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
   // Given this:
   //
   // x = G_SUB 0, y
@@ -4616,7 +4590,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
   // Produce this:
   //
   // cmn y, z
-  if (IsCMN(LHSDef, CC))
+  if (isCMN(LHSDef, P, MRI))
     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
 
   // Same idea here, but with the RHS of the compare instead:
@@ -4629,7 +4603,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
   // Produce this:
   //
   // cmn z, y
-  if (IsCMN(RHSDef, CC))
+  if (isCMN(RHSDef, P, MRI))
     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
 
   // Given this:

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 130416a04c6d3..558cd239f6f76 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -735,6 +735,113 @@ static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
   return true;
 }
 
+/// \returns how many instructions would be saved by folding a G_ICMP's shift
+/// and/or extension operations.
+static unsigned getCmpOperandFoldingProfit(Register CmpOp,
+                                           const MachineRegisterInfo &MRI) {
+  // No instructions to save if there's more than one use or no uses.
+  if (!MRI.hasOneNonDBGUse(CmpOp))
+    return 0;
+
+  // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
+  auto IsSupportedExtend = [&](const MachineInstr &MI) {
+    if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
+      return true;
+    if (MI.getOpcode() != TargetOpcode::G_AND)
+      return false;
+    auto ValAndVReg =
+        getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+    if (!ValAndVReg)
+      return false;
+    uint64_t Mask = ValAndVReg->Value.getZExtValue();
+    return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
+  };
+
+  MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI);
+  if (IsSupportedExtend(*Def))
+    return 1;
+
+  unsigned Opc = Def->getOpcode();
+  if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
+      Opc != TargetOpcode::G_LSHR)
+    return 0;
+
+  auto MaybeShiftAmt =
+      getConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
+  if (!MaybeShiftAmt)
+    return 0;
+  uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
+  MachineInstr *ShiftLHS =
+      getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
+
+  // Check if we can fold an extend and a shift.
+  // FIXME: This is duplicated with the selector. (See:
+  // selectArithExtendedRegister)
+  if (IsSupportedExtend(*ShiftLHS))
+    return (ShiftAmt <= 4) ? 2 : 1;
+
+  LLT Ty = MRI.getType(Def->getOperand(0).getReg());
+  if (Ty.isVector())
+    return 0;
+  unsigned ShiftSize = Ty.getSizeInBits();
+  if ((ShiftSize == 32 && ShiftAmt <= 31) ||
+      (ShiftSize == 64 && ShiftAmt <= 63))
+    return 1;
+  return 0;
+}
+
+/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
+/// instruction \p MI.
+static bool trySwapICmpOperands(MachineInstr &MI,
+                                 const MachineRegisterInfo &MRI) {
+  assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+  // Swap the operands if it would introduce a profitable folding opportunity.
+  // (e.g. a shift + extend).
+  //
+  //  For example:
+  //    lsl     w13, w11, #1
+  //    cmp     w13, w12
+  // can be turned into:
+  //    cmp     w12, w11, lsl #1
+
+  // Don't swap if there's a constant on the RHS, because we know we can fold
+  // that.
+  Register RHS = MI.getOperand(3).getReg();
+  auto RHSCst = getConstantVRegValWithLookThrough(RHS, MRI);
+  if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
+    return false;
+
+  Register LHS = MI.getOperand(2).getReg();
+  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+  auto GetRegForProfit = [&](Register Reg) {
+    MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
+    return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
+  };
+
+  // Don't have a constant on the RHS. If we swap the LHS and RHS of the
+  // compare, would we be able to fold more instructions?
+  Register TheLHS = GetRegForProfit(LHS);
+  Register TheRHS = GetRegForProfit(RHS);
+
+  // If the LHS is more likely to give us a folding opportunity, then swap the
+  // LHS and RHS.
+  return (getCmpOperandFoldingProfit(TheLHS, MRI) >
+          getCmpOperandFoldingProfit(TheRHS, MRI));
+}
+
+static bool applySwapICmpOperands(MachineInstr &MI,
+                                   GISelChangeObserver &Observer) {
+  auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+  Register LHS = MI.getOperand(2).getReg();
+  Register RHS = MI.getOperand(3).getReg();
+  Observer.changedInstr(MI);
+  MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
+  MI.getOperand(2).setReg(RHS);
+  MI.getOperand(3).setReg(LHS);
+  Observer.changedInstr(MI);
+  return true;
+}
+
 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
 #include "AArch64GenPostLegalizeGILowering.inc"
 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-swap-compare-operands.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-swap-compare-operands.mir
new file mode 100644
index 0000000000000..e9a2fcabd6298
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-swap-compare-operands.mir
@@ -0,0 +1,737 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=LOWER
+# RUN: llc -mtriple=aarch64 -global-isel -start-before=aarch64-postlegalizer-lowering -stop-after=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=SELECT
+#
+# Check that we swap the order of operands on comparisons when it is likely
+# to introduce a folding opportunity.
+#
+# The condition code for the compare should be changed when appropriate.
+#
+# TODO: emitBinOp doesn't know about selectArithExtendedRegister, so some of
+# these cases don't hit in selection yet.
+
+...
+---
+name:            swap_sextinreg_lhs
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; LOWER-LABEL: name: swap_sextinreg_lhs
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %reg:_(s64) = COPY $x0
+    ; LOWER: %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(sle), %cmp_rhs(s64), %cmp_lhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: swap_sextinreg_lhs
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %reg:gpr64all = COPY $x0
+    ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %reg.sub_32
+    ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; SELECT: %cmp_rhs:gpr64sp = COPY $x1
+    ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 32, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %reg:_(s64) = COPY $x0
+    %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8
+    %cmp_rhs:_(s64) = COPY $x1
+    %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_more_than_one_use
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; The LHS of the compare is used in an add, and a second compare. Don't
+    ; swap, since we don't gain any folding opportunities here.
+
+    ; LOWER-LABEL: name: dont_swap_more_than_one_use
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %reg0:_(s64) = COPY $x0
+    ; LOWER: %cmp_lhs:_(s64) = G_SEXT_INREG %reg0, 8
+    ; LOWER: %add:_(s64) = G_ADD %cmp_lhs, %reg0
+    ; LOWER: %cmp2:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %add
+    ; LOWER: $w0 = COPY %cmp2(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: dont_swap_more_than_one_use
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %reg0:gpr64 = COPY $x0
+    ; SELECT: %cmp_lhs:gpr64 = SBFMXri %reg0, 0, 7
+    ; SELECT: %add:gpr64 = ADDXrr %cmp_lhs, %reg0
+    ; SELECT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %add, implicit-def $nzcv
+    ; SELECT: %cmp2:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp2
+    ; SELECT: RET_ReallyLR implicit $w0
+    %reg0:_(s64) = COPY $x0
+    %cmp_lhs:_(s64) = G_SEXT_INREG %reg0, 8
+    %reg1:_(s64) = COPY $x1
+    %cmp1:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %reg1
+
+    %add:_(s64) = G_ADD %cmp_lhs(s64), %reg0
+    %cmp2:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %add
+
+    $w0 = COPY %cmp2(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_legal_arith_immed_on_rhs
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; Arithmetic immediates can be folded into compares. If we have one, then
+    ; don't bother changing anything.
+
+    ; LOWER-LABEL: name: dont_swap_legal_arith_immed_on_rhs
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %reg:_(s64) = COPY $x0
+    ; LOWER: %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8
+    ; LOWER: %cmp_rhs:_(s64) = G_CONSTANT i64 12
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: dont_swap_legal_arith_immed_on_rhs
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %reg:gpr64 = COPY $x0
+    ; SELECT: %cmp_lhs:gpr64common = SBFMXri %reg, 0, 7
+    ; SELECT: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri %cmp_lhs, 12, 0, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %reg:_(s64) = COPY $x0
+    %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8
+    %cmp_rhs:_(s64) = G_CONSTANT i64 12
+    %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_non_arith_immed_on_rhs
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; If we have a non-arithmetic immediate on the rhs, then we can swap to get
+    ; a guaranteed folding opportunity.
+
+    ; LOWER-LABEL: name: swap_non_arith_immed_on_rhs
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %reg:_(s64) = COPY $x0
+    ; LOWER: %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8
+    ; LOWER: %cmp_rhs:_(s64) = G_CONSTANT i64 1234567
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(sle), %cmp_rhs(s64), %cmp_lhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: swap_non_arith_immed_on_rhs
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %reg:gpr64all = COPY $x0
+    ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %reg.sub_32
+    ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; SELECT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1234567
+    ; SELECT: %cmp_rhs:gpr64sp = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+    ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 32, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %reg:_(s64) = COPY $x0
+    %cmp_lhs:_(s64) = G_SEXT_INREG %reg, 8
+    %cmp_rhs:_(s64) = G_CONSTANT i64 1234567
+    %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_and_lhs_0xFF
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; LOWER-LABEL: name: swap_and_lhs_0xFF
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %and_lhs:_(s64) = COPY $x0
+    ; LOWER: %cst:_(s64) = G_CONSTANT i64 255
+    ; LOWER: %cmp_lhs:_(s64) = G_AND %and_lhs, %cst
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(sle), %cmp_rhs(s64), %cmp_lhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: swap_and_lhs_0xFF
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %cmp_rhs:gpr64sp = COPY $x1
+    ; SELECT: %and_lhs:gpr64all = COPY $x0
+    ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %and_lhs.sub_32
+    ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 0, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s64) = COPY $x1
+
+    %and_lhs:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 255
+    %cmp_lhs:_(s64) = G_AND %and_lhs, %cst(s64)
+
+    %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_and_lhs_0xFFFF
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; LOWER-LABEL: name: swap_and_lhs_0xFFFF
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %cst:_(s64) = G_CONSTANT i64 65535
+    ; LOWER: %and_lhs:_(s64) = COPY $x0
+    ; LOWER: %cmp_lhs:_(s64) = G_AND %and_lhs, %cst
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(sle), %cmp_rhs(s64), %cmp_lhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: swap_and_lhs_0xFFFF
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %cmp_rhs:gpr64sp = COPY $x1
+    ; SELECT: %and_lhs:gpr64all = COPY $x0
+    ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %and_lhs.sub_32
+    ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 8, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s64) = COPY $x1
+
+    %cst:_(s64) = G_CONSTANT i64 65535
+    %and_lhs:_(s64) = COPY $x0
+    %cmp_lhs:_(s64) = G_AND %and_lhs, %cst(s64)
+
+    %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_and_lhs_0xFFFFFFFF
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; LOWER-LABEL: name: swap_and_lhs_0xFFFFFFFF
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %and_lhs:_(s64) = COPY $x0
+    ; LOWER: %cst:_(s64) = G_CONSTANT i64 4294967295
+    ; LOWER: %cmp_lhs:_(s64) = G_AND %and_lhs, %cst
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(sle), %cmp_rhs(s64), %cmp_lhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: swap_and_lhs_0xFFFFFFFF
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %cmp_rhs:gpr64sp = COPY $x1
+    ; SELECT: %and_lhs:gpr64all = COPY $x0
+    ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %and_lhs.sub_32
+    ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 16, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s64) = COPY $x1
+
+    %and_lhs:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 4294967295
+    %cmp_lhs:_(s64) = G_AND %and_lhs, %cst(s64)
+
+    %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_and_lhs_wrong_mask
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; 7 isn't an extend mask for G_AND, so there's no folding opportunities
+    ; here.
+    ;
+    ; LOWER-LABEL: name: dont_swap_and_lhs_wrong_mask
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %and_lhs:_(s64) = COPY $x0
+    ; LOWER: %not_an_extend_mask:_(s64) = G_CONSTANT i64 7
+    ; LOWER: %cmp_lhs:_(s64) = G_AND %and_lhs, %not_an_extend_mask
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: dont_swap_and_lhs_wrong_mask
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %cmp_rhs:gpr64 = COPY $x1
+    ; SELECT: %and_lhs:gpr64 = COPY $x0
+    ; SELECT: %cmp_lhs:gpr64common = ANDXri %and_lhs, 4098
+    ; SELECT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 11, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s64) = COPY $x1
+
+    %and_lhs:_(s64) = COPY $x0
+    %not_an_extend_mask:_(s64) = G_CONSTANT i64 7
+    %cmp_lhs:_(s64) = G_AND %and_lhs, %not_an_extend_mask(s64)
+
+    %cmp:_(s32) = G_ICMP intpred(sge), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_shl_lhs
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; LOWER-LABEL: name: swap_shl_lhs
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %shl_lhs:_(s64) = COPY $x0
+    ; LOWER: %cst:_(s64) = G_CONSTANT i64 1
+    ; LOWER: %cmp_lhs:_(s64) = G_SHL %shl_lhs, %cst(s64)
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(sgt), %cmp_rhs(s64), %cmp_lhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: swap_shl_lhs
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %cmp_rhs:gpr64 = COPY $x1
+    ; SELECT: %shl_lhs:gpr64 = COPY $x0
+    ; SELECT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %cmp_rhs, %shl_lhs, 1, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s64) = COPY $x1
+
+    %shl_lhs:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 1
+    %cmp_lhs:_(s64) = G_SHL %shl_lhs, %cst(s64)
+
+    %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_ashr_lhs
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; LOWER-LABEL: name: swap_ashr_lhs
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %ashr_lhs:_(s64) = COPY $x0
+    ; LOWER: %cst:_(s64) = G_CONSTANT i64 1
+    ; LOWER: %cmp_lhs:_(s64) = G_ASHR %ashr_lhs, %cst(s64)
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(sgt), %cmp_rhs(s64), %cmp_lhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: swap_ashr_lhs
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %cmp_rhs:gpr64 = COPY $x1
+    ; SELECT: %ashr_lhs:gpr64 = COPY $x0
+    ; SELECT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %cmp_rhs, %ashr_lhs, 129, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s64) = COPY $x1
+
+    %ashr_lhs:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 1
+    %cmp_lhs:_(s64) = G_ASHR %ashr_lhs, %cst(s64)
+
+    %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_lshr_lhs
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; LOWER-LABEL: name: swap_lshr_lhs
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %lshr_lhs:_(s64) = COPY $x0
+    ; LOWER: %cst:_(s64) = G_CONSTANT i64 1
+    ; LOWER: %cmp_lhs:_(s64) = G_LSHR %lshr_lhs, %cst(s64)
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(sgt), %cmp_rhs(s64), %cmp_lhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: swap_lshr_lhs
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %cmp_rhs:gpr64 = COPY $x1
+    ; SELECT: %lshr_lhs:gpr64 = COPY $x0
+    ; SELECT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %cmp_rhs, %lshr_lhs, 65, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s64) = COPY $x1
+
+    %lshr_lhs:_(s64) = COPY $x0
+    %cst:_(s64) = G_CONSTANT i64 1
+    %cmp_lhs:_(s64) = G_LSHR %lshr_lhs, %cst(s64)
+
+    %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_shift_s64_cst_too_large
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; Constant for the shift must be <= 63.
+
+    ; LOWER-LABEL: name: dont_swap_shift_s64_cst_too_large
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %shl_lhs:_(s64) = COPY $x0
+    ; LOWER: %too_large:_(s64) = G_CONSTANT i64 64
+    ; LOWER: %cmp_lhs:_(s64) = G_SHL %shl_lhs, %too_large(s64)
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: dont_swap_shift_s64_cst_too_large
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %cmp_rhs:gpr64 = COPY $x1
+    ; SELECT: %shl_lhs:gpr64 = COPY $x0
+    ; SELECT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 64
+    ; SELECT: %too_large:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+    ; SELECT: %cmp_lhs:gpr64 = LSLVXr %shl_lhs, %too_large
+    ; SELECT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s64) = COPY $x1
+
+    %shl_lhs:_(s64) = COPY $x0
+    %too_large:_(s64) = G_CONSTANT i64 64
+    %cmp_lhs:_(s64) = G_SHL %shl_lhs, %too_large(s64)
+
+    %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+
+...
+---
+name:            dont_swap_shift_s32_cst_too_large
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+
+    ; Constant for the shift must be <= 32.
+
+    ; LOWER-LABEL: name: dont_swap_shift_s32_cst_too_large
+    ; LOWER: liveins: $w0, $w1
+    ; LOWER: %cmp_rhs:_(s32) = COPY $w1
+    ; LOWER: %shl_lhs:_(s32) = COPY $w0
+    ; LOWER: %cst:_(s32) = G_CONSTANT i32 32
+    ; LOWER: %cmp_lhs:_(s32) = G_SHL %shl_lhs, %cst(s32)
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s32), %cmp_rhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: dont_swap_shift_s32_cst_too_large
+    ; SELECT: liveins: $w0, $w1
+    ; SELECT: %cmp_rhs:gpr32 = COPY $w1
+    ; SELECT: %shl_lhs:gpr32 = COPY $w0
+    ; SELECT: %cst:gpr32 = MOVi32imm 32
+    ; SELECT: %cmp_lhs:gpr32 = LSLVWr %shl_lhs, %cst
+    ; SELECT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %cmp_lhs, %cmp_rhs, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s32) = COPY $w1
+
+    %shl_lhs:_(s32) = COPY $w0
+    %cst:_(s32) = G_CONSTANT i32 32
+    %cmp_lhs:_(s32) = G_SHL %shl_lhs, %cst(s32)
+
+    %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s32), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_cmn_lhs_no_folding_opportunities
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; No reason to swap a CMN on the LHS when it won't introduce a constant
+    ; folding opportunity. We can recognise CMNs on the LHS and RHS, so there's
+    ; nothing to gain here.
+
+    ; LOWER-LABEL: name: dont_swap_cmn_lhs_no_folding_opportunities
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %sub_rhs:_(s64) = COPY $x0
+    ; LOWER: %zero:_(s64) = G_CONSTANT i64 0
+    ; LOWER: %cmp_lhs:_(s64) = G_SUB %zero, %sub_rhs
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(ne), %cmp_lhs(s64), %cmp_rhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: dont_swap_cmn_lhs_no_folding_opportunities
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %cmp_rhs:gpr64 = COPY $x1
+    ; SELECT: %sub_rhs:gpr64 = COPY $x0
+    ; SELECT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr %sub_rhs, %cmp_rhs, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s64) = COPY $x1
+
+    %sub_rhs:_(s64) = COPY $x0
+    %zero:_(s64) = G_CONSTANT i64 0
+    %cmp_lhs:_(s64) = G_SUB %zero, %sub_rhs
+
+    %cmp:_(s32) = G_ICMP intpred(ne), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            swap_cmn_lhs
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; Swap when we can see a constant folding opportunity through the sub on
+    ; the LHS.
+
+
+    ; LOWER-LABEL: name: swap_cmn_lhs
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %cmp_rhs:_(s64) = COPY $x1
+    ; LOWER: %shl_lhs:_(s64) = COPY $x0
+    ; LOWER: %zero:_(s64) = G_CONSTANT i64 0
+    ; LOWER: %cst:_(s64) = G_CONSTANT i64 63
+    ; LOWER: %sub_rhs:_(s64) = G_SHL %shl_lhs, %cst(s64)
+    ; LOWER: %cmp_lhs:_(s64) = G_SUB %zero, %sub_rhs
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(ne), %cmp_rhs(s64), %cmp_lhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: swap_cmn_lhs
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %cmp_rhs:gpr64 = COPY $x1
+    ; SELECT: %shl_lhs:gpr64 = COPY $x0
+    ; SELECT: [[ADDSXrs:%[0-9]+]]:gpr64 = ADDSXrs %cmp_rhs, %shl_lhs, 63, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %cmp_rhs:_(s64) = COPY $x1
+
+    %shl_lhs:_(s64) = COPY $x0
+    %zero:_(s64) = G_CONSTANT i64 0
+    %cst:_(s64) = G_CONSTANT i64 63
+    %sub_rhs:_(s64) = G_SHL %shl_lhs, %cst(s64)
+    %cmp_lhs:_(s64) = G_SUB %zero, %sub_rhs
+
+    %cmp:_(s32) = G_ICMP intpred(ne), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_cmn_lhs_when_rhs_more_profitable
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; Don't swap when the RHS's subtract offers a better constant folding
+    ; opportunity than the LHS's subtract.
+    ;
+    ; In this case, the RHS has a supported extend, plus a shift with a constant
+    ; <= 4.
+
+    ; LOWER-LABEL: name: dont_swap_cmn_lhs_when_rhs_more_profitable
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %zero:_(s64) = G_CONSTANT i64 0
+    ; LOWER: %reg0:_(s64) = COPY $x0
+    ; LOWER: %shl_cst:_(s64) = G_CONSTANT i64 63
+    ; LOWER: %shl:_(s64) = G_SHL %reg0, %shl_cst(s64)
+    ; LOWER: %cmp_lhs:_(s64) = G_SUB %zero, %shl
+    ; LOWER: %reg1:_(s64) = COPY $x1
+    ; LOWER: %sext_in_reg:_(s64) = G_SEXT_INREG %reg1, 1
+    ; LOWER: %ashr_cst:_(s64) = G_CONSTANT i64 3
+    ; LOWER: %ashr:_(s64) = G_ASHR %sext_in_reg, %ashr_cst(s64)
+    ; LOWER: %cmp_rhs:_(s64) = G_SUB %zero, %ashr
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: dont_swap_cmn_lhs_when_rhs_more_profitable
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %reg0:gpr64 = COPY $x0
+    ; SELECT: %shl:gpr64 = UBFMXri %reg0, 1, 0
+    ; SELECT: %zero:gpr64 = COPY $xzr
+    ; SELECT: %reg1:gpr64 = COPY $x1
+    ; SELECT: %sext_in_reg:gpr64 = SBFMXri %reg1, 0, 0
+    ; SELECT: %cmp_rhs:gpr64 = SUBSXrs %zero, %sext_in_reg, 131, implicit-def $nzcv
+    ; SELECT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr %shl, %cmp_rhs, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %zero:_(s64) = G_CONSTANT i64 0
+
+    %reg0:_(s64) = COPY $x0
+    %shl_cst:_(s64) = G_CONSTANT i64 63
+    %shl:_(s64) = G_SHL %reg0, %shl_cst(s64)
+    %cmp_lhs:_(s64) = G_SUB %zero, %shl
+
+    %reg1:_(s64) = COPY $x1
+    %sext_in_reg:_(s64) = G_SEXT_INREG %reg1, 1
+    %ashr_cst:_(s64) = G_CONSTANT i64 3
+    %ashr:_(s64) = G_ASHR %sext_in_reg, %ashr_cst(s64)
+    %cmp_rhs:_(s64) = G_SUB %zero, %ashr
+
+    %cmp:_(s32) = G_ICMP intpred(eq), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_swap_rhs_with_supported_extend
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; The RHS offers more constant folding opportunities than the LHS.
+
+    ; LOWER-LABEL: name: dont_swap_rhs_with_supported_extend
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %reg0:_(s64) = COPY $x0
+    ; LOWER: %lhs_cst:_(s64) = G_CONSTANT i64 1
+    ; LOWER: %cmp_lhs:_(s64) = G_SHL %reg0, %lhs_cst(s64)
+    ; LOWER: %reg1:_(s64) = COPY $x1
+    ; LOWER: %and_mask:_(s64) = G_CONSTANT i64 255
+    ; LOWER: %and:_(s64) = G_AND %reg1, %and_mask
+    ; LOWER: %rhs_cst:_(s64) = G_CONSTANT i64 1
+    ; LOWER: %cmp_rhs:_(s64) = G_ASHR %and, %rhs_cst(s64)
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: dont_swap_rhs_with_supported_extend
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %reg0:gpr64 = COPY $x0
+    ; SELECT: %cmp_lhs:gpr64 = UBFMXri %reg0, 63, 62
+    ; SELECT: %reg1:gpr64 = COPY $x1
+    ; SELECT: %and:gpr64common = ANDXri %reg1, 4103
+    ; SELECT: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %cmp_lhs, %and, 129, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %reg0:_(s64) = COPY $x0
+    %lhs_cst:_(s64) = G_CONSTANT i64 1
+    %cmp_lhs:_(s64) = G_SHL %reg0, %lhs_cst(s64)
+
+    %reg1:_(s64) = COPY $x1
+    %and_mask:_(s64) = G_CONSTANT i64 255
+    %and:_(s64) = G_AND %reg1, %and_mask(s64)
+    %rhs_cst:_(s64) = G_CONSTANT i64 1
+    %cmp_rhs:_(s64) = G_ASHR %and, %rhs_cst(s64)
+
+    %cmp:_(s32) = G_ICMP intpred(slt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0
+
+
+...
+---
+name:            swap_rhs_with_supported_extend
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; In this case, both the LHS and RHS are fed by a supported extend. However,
+    ; the LHS' shift has a constant <= 4. This makes it more profitable, so
+    ; we should swap the operands.
+
+    ; LOWER-LABEL: name: swap_rhs_with_supported_extend
+    ; LOWER: liveins: $x0, $x1
+    ; LOWER: %reg0:_(s64) = COPY $x0
+    ; LOWER: %and_mask:_(s64) = G_CONSTANT i64 255
+    ; LOWER: %and:_(s64) = G_AND %reg0, %and_mask
+    ; LOWER: %lhs_cst:_(s64) = G_CONSTANT i64 1
+    ; LOWER: %cmp_lhs:_(s64) = G_SHL %and, %lhs_cst(s64)
+    ; LOWER: %rhs_cst:_(s64) = G_CONSTANT i64 5
+    ; LOWER: %cmp_rhs:_(s64) = G_ASHR %and, %rhs_cst(s64)
+    ; LOWER: %cmp:_(s32) = G_ICMP intpred(slt), %cmp_rhs(s64), %cmp_lhs
+    ; LOWER: $w0 = COPY %cmp(s32)
+    ; LOWER: RET_ReallyLR implicit $w0
+    ; SELECT-LABEL: name: swap_rhs_with_supported_extend
+    ; SELECT: liveins: $x0, $x1
+    ; SELECT: %reg0:gpr64 = COPY $x0
+    ; SELECT: %and:gpr64common = ANDXri %reg0, 4103
+    ; SELECT: [[COPY:%[0-9]+]]:gpr32all = COPY %reg0.sub_32
+    ; SELECT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; SELECT: %cmp_rhs:gpr64common = SBFMXri %and, 5, 63
+    ; SELECT: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %cmp_rhs, [[COPY1]], 1, implicit-def $nzcv
+    ; SELECT: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+    ; SELECT: $w0 = COPY %cmp
+    ; SELECT: RET_ReallyLR implicit $w0
+    %reg0:_(s64) = COPY $x0
+    %and_mask:_(s64) = G_CONSTANT i64 255
+    %and:_(s64) = G_AND %reg0, %and_mask(s64)
+
+    %lhs_cst:_(s64) = G_CONSTANT i64 1
+    %cmp_lhs:_(s64) = G_SHL %and, %lhs_cst(s64)
+
+    %rhs_cst:_(s64) = G_CONSTANT i64 5
+    %cmp_rhs:_(s64) = G_ASHR %and, %rhs_cst(s64)
+
+    %cmp:_(s32) = G_ICMP intpred(sgt), %cmp_lhs(s64), %cmp_rhs
+    $w0 = COPY %cmp(s32)
+    RET_ReallyLR implicit $w0