[llvm] [GlobalIsel] Combine G_SELECT (PR #74116)

Thorsten Schütt via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 3 01:48:14 PST 2023


https://github.com/tschuett updated https://github.com/llvm/llvm-project/pull/74116

>From ea508bf43e25b0e74a801c06872dff11d2505a78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Fri, 1 Dec 2023 18:31:41 +0100
Subject: [PATCH 1/2] [GlobalIsel] Combine G_SELECT

At least AArch64 and X86 use flag setting instructions for the
implementation. Inspired by visitSELECT, as the first step, this
implements combines based on conditions def`d by logical binary
operations aka chaining.

On Neoverse V2, sub and flagset subs have different costs and
pipelines.

test plan: ninja check-llvm-codegen
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |   7 +
 .../CodeGen/GlobalISel/GenericMachineInstrs.h |  41 ++
 .../include/llvm/Target/GlobalISel/Combine.td |   9 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  75 +++
 .../AArch64/GlobalISel/combine-select.mir     |  95 +++
 llvm/test/CodeGen/AArch64/arm64-ccmp.ll       | 615 ++++++++++++------
 .../CodeGen/AArch64/dag-combine-select.ll     |  44 +-
 .../AMDGPU/GlobalISel/bool-legalization.ll    |  14 +-
 8 files changed, 649 insertions(+), 251 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index ba72a3b71ffd7..e938e2267fdd6 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -812,6 +812,9 @@ class CombinerHelper {
   // Given a binop \p MI, commute operands 1 and 2.
   void applyCommuteBinOpOperands(MachineInstr &MI);
 
+  // combine selects.
+  bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -902,6 +905,10 @@ class CombinerHelper {
   /// select (fcmp uge x, 1.0) 1.0, x -> fminnm x, 1.0
   bool matchFPSelectToMinMax(Register Dst, Register Cond, Register TrueVal,
                              Register FalseVal, BuildFnTy &MatchInfo);
+
+  /// Try to combine selects where the condition is def'd by logical binary
+  /// operators.
+  bool tryCombineSelectLogical(GSelect *Select, BuildFnTy &MatchInfo);
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6ab1d4550c51c..9641ad6bfd919 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -558,6 +558,47 @@ class GVecReduce : public GenericMachineInstr {
   }
 };
 
+/// Represents a logical binary operator.
+class GLogicalBinOp : public GenericMachineInstr {
+public:
+  Register getLHSReg() const { return getOperand(1).getReg(); }
+  Register getRHSReg() const { return getOperand(2).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_AND:
+    case TargetOpcode::G_OR:
+    case TargetOpcode::G_XOR:
+      return true;
+    default:
+      return false;
+    };
+  }
+};
+
+/// Represents a G_AND.
+class GAnd : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_AND;
+  }
+};
+
+/// Represents a G_OR.
+class GOr : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_OR;
+  }
+};
+
+/// Represents a G_XOR.
+class GXor : public GLogicalBinOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_XOR;
+  }
+};
 
 } // namespace llvm
 
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9a84ab80157f3..7d27482c319d6 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1236,6 +1236,13 @@ def select_to_minmax: GICombineRule<
          [{ return Helper.matchSimplifySelectToMinMax(*${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
 
+def match_selects : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_SELECT):$root,
+        [{ return Helper.matchSelect(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -1309,7 +1316,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     intdiv_combines, mulh_combines, redundant_neg_operands,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax, redundant_binop_in_equality,
-    fsub_to_fneg, commute_constant_to_rhs]>;
+    fsub_to_fneg, commute_constant_to_rhs, match_selects]>;
 
 // A combine group used to for prelegalizer combiners at -O0. The combines in
 // this group have been selected based on experiments to balance code size and
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c2a7c2d011881..d83215dcc83b4 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6224,3 +6224,78 @@ void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
   MI.getOperand(2).setReg(LHSReg);
   Observer.changedInstr(MI);
 }
+
+bool CombinerHelper::tryCombineSelectLogical(GSelect *Select,
+                                             BuildFnTy &MatchInfo) {
+  MachineInstr *CondDef = MRI.getVRegDef(Select->getCondReg());
+  LLT DstTy = MRI.getType(Select->getReg(0));
+  uint32_t Flags = Select->getFlags();
+  Register DstReg = Select->getReg(0);
+
+  if (isa<GLogicalBinOp>(CondDef)) {
+    // The chaining might look like a pessimization.
+    // In fact, it is an optimization. One instruction def`ing
+    // the condiiton is
+    // removed and the conditions of the selects get simplified.
+    // In the next iteration, the combiner has to do less work to
+    // further simplify them.
+    if (isa<GAnd>(CondDef)) {
+      GAnd *And = cast<GAnd>(CondDef);
+      // transform: select (and (Cond0, Cond1)), X, Y
+      // to:        select Cond0, (select Cond1, X, Y), Y
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.setInstrAndDebugLoc(*Select);
+        Register Inner = MRI.createGenericVirtualRegister(DstTy);
+        B.buildSelect(Inner, And->getRHSReg(), Select->getTrueReg(),
+                      Select->getFalseReg(), Flags);
+        B.buildSelect(DstReg, And->getLHSReg(), Inner, Select->getFalseReg(),
+                      Flags);
+      };
+      return true;
+    } else if (isa<GOr>(CondDef)) {
+      GOr *Or = cast<GOr>(CondDef);
+      // transform: select (or (Cond0, Cond1)), X, Y
+      // to:        select Cond0, X, (select Cond1, X, Y)
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.setInstrAndDebugLoc(*Select);
+        Register Inner = MRI.createGenericVirtualRegister(DstTy);
+        B.buildSelect(Inner, Or->getRHSReg(), Select->getTrueReg(),
+                      Select->getFalseReg(), Flags);
+        B.buildSelect(DstReg, Or->getLHSReg(), Select->getTrueReg(), Inner,
+                      Flags);
+      };
+      return true;
+    } else if (isa<GXor>(CondDef)) {
+      GXor *Xor = cast<GXor>(CondDef);
+      auto IConstant =
+          getIConstantVRegValWithLookThrough(Xor->getRHSReg(), MRI);
+      if (IConstant && IConstant->Value == 1) {
+        // transform: select (xor Cond0, 1), X, Y
+        // to:        select Cond0 Y, X
+        // a xor 1 reads like not a.
+        MatchInfo = [=](MachineIRBuilder &B) {
+          B.setInstrAndDebugLoc(*Select);
+          B.buildSelect(DstReg, Xor->getLHSReg(), Select->getFalseReg(),
+                        Select->getTrueReg(), Flags);
+        };
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
+  GSelect *Select = cast<GSelect>(&MI);
+
+  // FIXME: support vector conditions
+  if (MRI.getType(Select->getCondReg()).isVector())
+    return false;
+
+  // combine selects where the condition is def'd by logical binary operations.
+  if (tryCombineSelectLogical(Select, MatchInfo))
+    return true;
+
+  return false;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index 81d38a5b08047..a60148ffe5716 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -298,3 +298,98 @@ body:             |
     %ext:_(s32) = G_ANYEXT %sel
     $w0 = COPY %ext(s32)
 ...
+---
+name:            and_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: and_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %y:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY3]](s64)
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT %y(s1), %t, %f
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), [[SELECT]], %f
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %x:_(s1) = G_TRUNC %0
+    %y:_(s1) = G_TRUNC %1
+    %t:_(s1) = G_TRUNC %2
+    %f:_(s1) = G_TRUNC %3
+    %cond:_(s1) = G_AND %x, %y
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
+---
+name:            or_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: or_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %y:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY3]](s64)
+    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT %y(s1), %t, %f
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), %t, [[SELECT]]
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %x:_(s1) = G_TRUNC %0
+    %y:_(s1) = G_TRUNC %1
+    %t:_(s1) = G_TRUNC %2
+    %f:_(s1) = G_TRUNC %3
+    %cond:_(s1) = G_OR %x, %y
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
+---
+name:            xor_cond
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: xor_cond
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), %f, %t
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %x:_(s1) = G_TRUNC %0
+    %t:_(s1) = G_TRUNC %1
+    %f:_(s1) = G_TRUNC %2
+    %one:_(s1) = G_CONSTANT i1 1
+    %cond:_(s1) = G_XOR %x, %one
+    %sel:_(s1) = G_SELECT %cond, %t, %f
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 821f6e403a271..7bf5ac22d0ae3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -14,7 +14,7 @@ define i32 @single_same(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB0_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -42,7 +42,7 @@ define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; SDISEL-NEXT:  LBB1_2: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_different:
@@ -55,7 +55,7 @@ define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; GISEL-NEXT:  LBB1_2: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sle i32 %a, 5
@@ -88,7 +88,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; SDISEL-NEXT:  LBB2_3: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_flagclobber:
@@ -106,7 +106,7 @@ define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; GISEL-NEXT:  LBB2_3: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -144,7 +144,7 @@ define i32 @single_flagclobber_tbz(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB3_3: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -178,13 +178,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
 ; SDISEL-NEXT:    ccmp w8, #16, #0, ge
 ; SDISEL-NEXT:    b.le LBB4_2
 ; SDISEL-NEXT:  ; %bb.1: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ; SDISEL-NEXT:  LBB4_2: ; %if.then
 ; SDISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: speculate_division:
@@ -194,13 +194,13 @@ define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
 ; GISEL-NEXT:    ccmp w8, #17, #0, gt
 ; GISEL-NEXT:    b.lt LBB4_2
 ; GISEL-NEXT:  ; %bb.1: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 ; GISEL-NEXT:  LBB4_2: ; %if.then
 ; GISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sgt i32 %a, 0
@@ -230,13 +230,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
 ; SDISEL-NEXT:    fccmp s0, s1, #8, ge
 ; SDISEL-NEXT:    b.ge LBB5_2
 ; SDISEL-NEXT:  ; %bb.1: ; %if.end
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ; SDISEL-NEXT:  LBB5_2: ; %if.then
 ; SDISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; SDISEL-NEXT:    bl _foo
 ; SDISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT:    mov w0, #7
+; SDISEL-NEXT:    mov w0, #7 ; =0x7
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: single_fcmp:
@@ -248,13 +248,13 @@ define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
 ; GISEL-NEXT:    fccmp s0, s1, #8, gt
 ; GISEL-NEXT:    b.ge LBB5_2
 ; GISEL-NEXT:  ; %bb.1: ; %if.end
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 ; GISEL-NEXT:  LBB5_2: ; %if.then
 ; GISEL-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; GISEL-NEXT:    bl _foo
 ; GISEL-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT:    mov w0, #7
+; GISEL-NEXT:    mov w0, #7 ; =0x7
 ; GISEL-NEXT:    ret
 entry:
   %cmp = icmp sgt i32 %a, 0
@@ -318,7 +318,7 @@ define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB7_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -346,13 +346,13 @@ define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    cmp w1, #32
 ; CHECK-NEXT:    b.eq LBB8_3
 ; CHECK-NEXT:  ; %bb.2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  LBB8_3: ; %if.then
 ; CHECK-NEXT:    stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 5
@@ -380,7 +380,7 @@ define i32 @cbz_second(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB9_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -408,7 +408,7 @@ define i32 @cbnz_second(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    bl _foo
 ; CHECK-NEXT:    ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
 ; CHECK-NEXT:  LBB10_2: ; %if.end
-; CHECK-NEXT:    mov w0, #7
+; CHECK-NEXT:    mov w0, #7 ; =0x7
 ; CHECK-NEXT:    ret
 entry:
   %cmp = icmp eq i32 %a, 0
@@ -466,10 +466,11 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 ;
 ; GISEL-LABEL: select_and:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #0, ne
-; GISEL-NEXT:    csel x0, x2, x3, lt
+; GISEL-NEXT:    csel x8, x2, x3, ne
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel x0, x8, x3, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
@@ -488,10 +489,11 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
 ;
 ; GISEL-LABEL: select_or:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
-; GISEL-NEXT:    csel x0, x2, x3, lt
+; GISEL-NEXT:    csel x8, x2, x3, ne
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel x0, x2, x8, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
   %2 = icmp ne i32 5, %w1
@@ -510,9 +512,10 @@ define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
 ;
 ; GISEL-LABEL: select_or_float:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #5
+; GISEL-NEXT:    mov w8, #5 ; =0x5
 ; GISEL-NEXT:    cmp w8, w1
-; GISEL-NEXT:    ccmp w0, w1, #8, eq
+; GISEL-NEXT:    fcsel s1, s0, s1, ne
+; GISEL-NEXT:    cmp w0, w1
 ; GISEL-NEXT:    fcsel s0, s0, s1, lt
 ; GISEL-NEXT:    ret
   %1 = icmp slt i32 %w0, %w1
@@ -528,17 +531,19 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
 ; SDISEL-NEXT:    cmp x0, #2
 ; SDISEL-NEXT:    ccmp x0, #4, #4, ne
 ; SDISEL-NEXT:    ccmp x1, #0, #0, eq
-; SDISEL-NEXT:    mov w8, #1
+; SDISEL-NEXT:    mov w8, #1 ; =0x1
 ; SDISEL-NEXT:    cinc x0, x8, eq
 ; SDISEL-NEXT:    ret
 ;
 ; GISEL-LABEL: gccbug:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #2
+; GISEL-NEXT:    mov w8, #2 ; =0x2
+; GISEL-NEXT:    cmp x1, #0
+; GISEL-NEXT:    csinc x8, x8, xzr, eq
 ; GISEL-NEXT:    cmp x0, #2
-; GISEL-NEXT:    ccmp x0, #4, #4, ne
-; GISEL-NEXT:    ccmp x1, #0, #0, eq
-; GISEL-NEXT:    csinc x0, x8, xzr, eq
+; GISEL-NEXT:    csinc x9, x8, xzr, eq
+; GISEL-NEXT:    cmp x0, #4
+; GISEL-NEXT:    csel x0, x8, x9, eq
 ; GISEL-NEXT:    ret
   %cmp0 = icmp eq i64 %x1, 0
   %cmp1 = icmp eq i64 %x0, 2
@@ -552,14 +557,26 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
 }
 
 define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) {
-; CHECK-LABEL: select_ororand:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    cmp w3, #4
-; CHECK-NEXT:    ccmp w2, #2, #0, gt
-; CHECK-NEXT:    ccmp w1, #13, #2, ge
-; CHECK-NEXT:    ccmp w0, #0, #4, ls
-; CHECK-NEXT:    csel w0, w3, wzr, eq
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_ororand:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    cmp w3, #4
+; SDISEL-NEXT:    ccmp w2, #2, #0, gt
+; SDISEL-NEXT:    ccmp w1, #13, #2, ge
+; SDISEL-NEXT:    ccmp w0, #0, #4, ls
+; SDISEL-NEXT:    csel w0, w3, wzr, eq
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_ororand:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w3, #4
+; GISEL-NEXT:    csel w8, w3, wzr, gt
+; GISEL-NEXT:    cmp w2, #2
+; GISEL-NEXT:    csel w8, w8, wzr, lt
+; GISEL-NEXT:    cmp w1, #13
+; GISEL-NEXT:    csel w8, w3, w8, hi
+; GISEL-NEXT:    cmp w0, #0
+; GISEL-NEXT:    csel w0, w3, w8, eq
+; GISEL-NEXT:    ret
   %c0 = icmp eq i32 %w0, 0
   %c1 = icmp ugt i32 %w1, 13
   %c2 = icmp slt i32 %w2, 2
@@ -572,13 +589,23 @@ define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) {
 }
 
 define i32 @select_andor(i32 %v1, i32 %v2, i32 %v3) {
-; CHECK-LABEL: select_andor:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    cmp w1, w2
-; CHECK-NEXT:    ccmp w0, #0, #4, lt
-; CHECK-NEXT:    ccmp w0, w1, #0, eq
-; CHECK-NEXT:    csel w0, w0, w1, eq
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_andor:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    cmp w1, w2
+; SDISEL-NEXT:    ccmp w0, #0, #4, lt
+; SDISEL-NEXT:    ccmp w0, w1, #0, eq
+; SDISEL-NEXT:    csel w0, w0, w1, eq
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_andor:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel w8, w0, w1, eq
+; GISEL-NEXT:    cmp w1, w2
+; GISEL-NEXT:    csel w9, w8, w1, ge
+; GISEL-NEXT:    cmp w0, #0
+; GISEL-NEXT:    csel w0, w8, w9, eq
+; GISEL-NEXT:    ret
   %c0 = icmp eq i32 %v1, %v2
   %c1 = icmp sge i32 %v2, %v3
   %c2 = icmp eq i32 %v1, 0
@@ -592,7 +619,7 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) {
 ; SDISEL-LABEL: select_andor32:
 ; SDISEL:       ; %bb.0:
 ; SDISEL-NEXT:    cmp w1, w2
-; SDISEL-NEXT:    mov w8, #32
+; SDISEL-NEXT:    mov w8, #32 ; =0x20
 ; SDISEL-NEXT:    ccmp w0, w8, #4, lt
 ; SDISEL-NEXT:    ccmp w0, w1, #0, eq
 ; SDISEL-NEXT:    csel w0, w0, w1, eq
@@ -600,11 +627,12 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) {
 ;
 ; GISEL-LABEL: select_andor32:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    mov w8, #32
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel w8, w0, w1, eq
 ; GISEL-NEXT:    cmp w1, w2
-; GISEL-NEXT:    ccmp w0, w8, #4, lt
-; GISEL-NEXT:    ccmp w0, w1, #0, eq
-; GISEL-NEXT:    csel w0, w0, w1, eq
+; GISEL-NEXT:    csel w9, w8, w1, ge
+; GISEL-NEXT:    cmp w0, #32
+; GISEL-NEXT:    csel w0, w8, w9, eq
 ; GISEL-NEXT:    ret
   %c0 = icmp eq i32 %v1, %v2
   %c1 = icmp sge i32 %v2, %v3
@@ -630,19 +658,14 @@ define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
 ;
 ; GISEL-LABEL: select_noccmp1:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    cmp x0, #0
-; GISEL-NEXT:    cset w8, lt
-; GISEL-NEXT:    cmp x0, #13
-; GISEL-NEXT:    cset w9, gt
-; GISEL-NEXT:    cmp x2, #2
-; GISEL-NEXT:    cset w10, lt
 ; GISEL-NEXT:    cmp x2, #4
-; GISEL-NEXT:    cset w11, gt
-; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    and w9, w10, w11
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel x0, xzr, x3, ne
+; GISEL-NEXT:    csel x8, xzr, x3, gt
+; GISEL-NEXT:    cmp x2, #2
+; GISEL-NEXT:    csel x8, x8, x3, lt
+; GISEL-NEXT:    cmp x0, #13
+; GISEL-NEXT:    csel x9, xzr, x8, gt
+; GISEL-NEXT:    cmp x0, #0
+; GISEL-NEXT:    csel x0, x9, x8, lt
 ; GISEL-NEXT:    ret
   %c0 = icmp slt i64 %v1, 0
   %c1 = icmp sgt i64 %v1, 13
@@ -676,10 +699,12 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
 ; GISEL-NEXT:    cset w8, lt
 ; GISEL-NEXT:    cmp x0, #13
 ; GISEL-NEXT:    cset w9, gt
-; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    orr w10, w8, w9
+; GISEL-NEXT:    tst w9, #0x1
+; GISEL-NEXT:    csel x9, xzr, x3, ne
 ; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel x0, xzr, x3, ne
-; GISEL-NEXT:    sbfx w8, w8, #0, #1
+; GISEL-NEXT:    csel x0, xzr, x9, ne
+; GISEL-NEXT:    sbfx w8, w10, #0, #1
 ; GISEL-NEXT:    adrp x9, _g at PAGE
 ; GISEL-NEXT:    str w8, [x9, _g at PAGEOFF]
 ; GISEL-NEXT:    ret
@@ -701,11 +726,11 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
 ; SDISEL-NEXT:    ccmp w0, #13, #0, ge
 ; SDISEL-NEXT:    cset w8, gt
 ; SDISEL-NEXT:    cmp w0, #22
-; SDISEL-NEXT:    mov w9, #44
+; SDISEL-NEXT:    mov w9, #44 ; =0x2c
 ; SDISEL-NEXT:    ccmp w0, w9, #0, ge
 ; SDISEL-NEXT:    csel w8, wzr, w8, le
 ; SDISEL-NEXT:    cmp w0, #99
-; SDISEL-NEXT:    mov w9, #77
+; SDISEL-NEXT:    mov w9, #77 ; =0x4d
 ; SDISEL-NEXT:    ccmp w0, w9, #4, ne
 ; SDISEL-NEXT:    cset w9, eq
 ; SDISEL-NEXT:    tst w8, w9
@@ -714,25 +739,18 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
 ;
 ; GISEL-LABEL: select_noccmp3:
 ; GISEL:       ; %bb.0:
-; GISEL-NEXT:    cmp w0, #0
-; GISEL-NEXT:    cset w8, lt
-; GISEL-NEXT:    cmp w0, #13
-; GISEL-NEXT:    cset w9, gt
-; GISEL-NEXT:    cmp w0, #22
-; GISEL-NEXT:    cset w10, lt
-; GISEL-NEXT:    cmp w0, #44
-; GISEL-NEXT:    cset w11, gt
-; GISEL-NEXT:    cmp w0, #99
-; GISEL-NEXT:    cset w12, eq
 ; GISEL-NEXT:    cmp w0, #77
-; GISEL-NEXT:    cset w13, eq
-; GISEL-NEXT:    orr w8, w8, w9
-; GISEL-NEXT:    orr w9, w10, w11
-; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    orr w9, w12, w13
-; GISEL-NEXT:    and w8, w8, w9
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w1, w2, ne
+; GISEL-NEXT:    csel w8, w1, w2, eq
+; GISEL-NEXT:    cmp w0, #99
+; GISEL-NEXT:    csel w8, w1, w8, eq
+; GISEL-NEXT:    cmp w0, #44
+; GISEL-NEXT:    csel w9, w8, w2, gt
+; GISEL-NEXT:    cmp w0, #22
+; GISEL-NEXT:    csel w8, w8, w9, lt
+; GISEL-NEXT:    cmp w0, #13
+; GISEL-NEXT:    csel w9, w8, w2, gt
+; GISEL-NEXT:    cmp w0, #0
+; GISEL-NEXT:    csel w0, w8, w9, lt
 ; GISEL-NEXT:    ret
   %c0 = icmp slt i32 %v0, 0
   %c1 = icmp sgt i32 %v0, 13
@@ -752,13 +770,25 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
 ; Test the IR CCs that expand to two cond codes.
 
 define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: select_and_olt_one:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    fccmp d2, d3, #4, mi
-; CHECK-NEXT:    fccmp d2, d3, #1, ne
-; CHECK-NEXT:    csel w0, w0, w1, vc
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_and_olt_one:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    fcmp d0, d1
+; SDISEL-NEXT:    fccmp d2, d3, #4, mi
+; SDISEL-NEXT:    fccmp d2, d3, #1, ne
+; SDISEL-NEXT:    csel w0, w0, w1, vc
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_and_olt_one:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp d2, d3
+; GISEL-NEXT:    cset w8, mi
+; GISEL-NEXT:    cset w9, gt
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    fcmp d0, d1
+; GISEL-NEXT:    csel w9, w0, w1, mi
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w0, w9, w1, ne
+; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp one double %v2, %v3
   %cr = and i1 %c1, %c0
@@ -767,13 +797,25 @@ define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i
 }
 
 define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: select_and_one_olt:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    fccmp d0, d1, #1, ne
-; CHECK-NEXT:    fccmp d2, d3, #0, vc
-; CHECK-NEXT:    csel w0, w0, w1, mi
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_and_one_olt:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    fcmp d0, d1
+; SDISEL-NEXT:    fccmp d0, d1, #1, ne
+; SDISEL-NEXT:    fccmp d2, d3, #0, vc
+; SDISEL-NEXT:    csel w0, w0, w1, mi
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_and_one_olt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp d0, d1
+; GISEL-NEXT:    cset w8, mi
+; GISEL-NEXT:    cset w9, gt
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w8, w0, w1, ne
+; GISEL-NEXT:    fcmp d2, d3
+; GISEL-NEXT:    csel w0, w8, w1, mi
+; GISEL-NEXT:    ret
   %c0 = fcmp one double %v0, %v1
   %c1 = fcmp olt double %v2, %v3
   %cr = and i1 %c1, %c0
@@ -782,13 +824,25 @@ define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i
 }
 
 define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: select_and_olt_ueq:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    fccmp d2, d3, #0, mi
-; CHECK-NEXT:    fccmp d2, d3, #8, le
-; CHECK-NEXT:    csel w0, w0, w1, pl
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_and_olt_ueq:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    fcmp d0, d1
+; SDISEL-NEXT:    fccmp d2, d3, #0, mi
+; SDISEL-NEXT:    fccmp d2, d3, #8, le
+; SDISEL-NEXT:    csel w0, w0, w1, pl
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_and_olt_ueq:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp d2, d3
+; GISEL-NEXT:    cset w8, eq
+; GISEL-NEXT:    cset w9, vs
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    fcmp d0, d1
+; GISEL-NEXT:    csel w9, w0, w1, mi
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w0, w9, w1, ne
+; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp ueq double %v2, %v3
   %cr = and i1 %c1, %c0
@@ -797,13 +851,25 @@ define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i
 }
 
 define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: select_and_ueq_olt:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    fccmp d0, d1, #8, le
-; CHECK-NEXT:    fccmp d2, d3, #0, pl
-; CHECK-NEXT:    csel w0, w0, w1, mi
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_and_ueq_olt:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    fcmp d0, d1
+; SDISEL-NEXT:    fccmp d0, d1, #8, le
+; SDISEL-NEXT:    fccmp d2, d3, #0, pl
+; SDISEL-NEXT:    csel w0, w0, w1, mi
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_and_ueq_olt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp d0, d1
+; GISEL-NEXT:    cset w8, eq
+; GISEL-NEXT:    cset w9, vs
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w8, w0, w1, ne
+; GISEL-NEXT:    fcmp d2, d3
+; GISEL-NEXT:    csel w0, w8, w1, mi
+; GISEL-NEXT:    ret
   %c0 = fcmp ueq double %v0, %v1
   %c1 = fcmp olt double %v2, %v3
   %cr = and i1 %c1, %c0
@@ -812,13 +878,25 @@ define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i
 }
 
 define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: select_or_olt_one:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    fccmp d2, d3, #0, pl
-; CHECK-NEXT:    fccmp d2, d3, #8, le
-; CHECK-NEXT:    csel w0, w0, w1, mi
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_or_olt_one:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    fcmp d0, d1
+; SDISEL-NEXT:    fccmp d2, d3, #0, pl
+; SDISEL-NEXT:    fccmp d2, d3, #8, le
+; SDISEL-NEXT:    csel w0, w0, w1, mi
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_or_olt_one:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp d2, d3
+; GISEL-NEXT:    cset w8, mi
+; GISEL-NEXT:    cset w9, gt
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    fcmp d0, d1
+; GISEL-NEXT:    csel w9, w0, w1, mi
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w0, w0, w9, ne
+; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp one double %v2, %v3
   %cr = or i1 %c1, %c0
@@ -827,13 +905,25 @@ define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i3
 }
 
 define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: select_or_one_olt:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    fccmp d0, d1, #8, le
-; CHECK-NEXT:    fccmp d2, d3, #8, pl
-; CHECK-NEXT:    csel w0, w0, w1, mi
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_or_one_olt:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    fcmp d0, d1
+; SDISEL-NEXT:    fccmp d0, d1, #8, le
+; SDISEL-NEXT:    fccmp d2, d3, #8, pl
+; SDISEL-NEXT:    csel w0, w0, w1, mi
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_or_one_olt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp d0, d1
+; GISEL-NEXT:    cset w8, mi
+; GISEL-NEXT:    cset w9, gt
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w8, w0, w1, ne
+; GISEL-NEXT:    fcmp d2, d3
+; GISEL-NEXT:    csel w0, w0, w8, mi
+; GISEL-NEXT:    ret
   %c0 = fcmp one double %v0, %v1
   %c1 = fcmp olt double %v2, %v3
   %cr = or i1 %c1, %c0
@@ -842,13 +932,25 @@ define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i3
 }
 
 define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: select_or_olt_ueq:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    fccmp d2, d3, #4, pl
-; CHECK-NEXT:    fccmp d2, d3, #1, ne
-; CHECK-NEXT:    csel w0, w0, w1, vs
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_or_olt_ueq:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    fcmp d0, d1
+; SDISEL-NEXT:    fccmp d2, d3, #4, pl
+; SDISEL-NEXT:    fccmp d2, d3, #1, ne
+; SDISEL-NEXT:    csel w0, w0, w1, vs
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_or_olt_ueq:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp d2, d3
+; GISEL-NEXT:    cset w8, eq
+; GISEL-NEXT:    cset w9, vs
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    fcmp d0, d1
+; GISEL-NEXT:    csel w9, w0, w1, mi
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w0, w0, w9, ne
+; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp ueq double %v2, %v3
   %cr = or i1 %c1, %c0
@@ -857,13 +959,25 @@ define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i3
 }
 
 define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: select_or_ueq_olt:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    fccmp d0, d1, #1, ne
-; CHECK-NEXT:    fccmp d2, d3, #8, vc
-; CHECK-NEXT:    csel w0, w0, w1, mi
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_or_ueq_olt:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    fcmp d0, d1
+; SDISEL-NEXT:    fccmp d0, d1, #1, ne
+; SDISEL-NEXT:    fccmp d2, d3, #8, vc
+; SDISEL-NEXT:    csel w0, w0, w1, mi
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_or_ueq_olt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp d0, d1
+; GISEL-NEXT:    cset w8, eq
+; GISEL-NEXT:    cset w9, vs
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w8, w0, w1, ne
+; GISEL-NEXT:    fcmp d2, d3
+; GISEL-NEXT:    csel w0, w0, w8, mi
+; GISEL-NEXT:    ret
   %c0 = fcmp ueq double %v0, %v1
   %c1 = fcmp olt double %v2, %v3
   %cr = or i1 %c1, %c0
@@ -872,14 +986,28 @@ define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i3
 }
 
 define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3, double %v4, double %v5, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: select_or_olt_ogt_ueq:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    fccmp d2, d3, #0, pl
-; CHECK-NEXT:    fccmp d4, d5, #4, le
-; CHECK-NEXT:    fccmp d4, d5, #1, ne
-; CHECK-NEXT:    csel w0, w0, w1, vs
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_or_olt_ogt_ueq:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    fcmp d0, d1
+; SDISEL-NEXT:    fccmp d2, d3, #0, pl
+; SDISEL-NEXT:    fccmp d4, d5, #4, le
+; SDISEL-NEXT:    fccmp d4, d5, #1, ne
+; SDISEL-NEXT:    csel w0, w0, w1, vs
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_or_olt_ogt_ueq:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp d4, d5
+; GISEL-NEXT:    cset w8, eq
+; GISEL-NEXT:    cset w9, vs
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    fcmp d0, d1
+; GISEL-NEXT:    csel w9, w0, w1, mi
+; GISEL-NEXT:    fcmp d2, d3
+; GISEL-NEXT:    csel w9, w0, w9, gt
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w0, w0, w9, ne
+; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp ogt double %v2, %v3
   %c2 = fcmp ueq double %v4, %v5
@@ -890,14 +1018,28 @@ define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3
 }
 
 define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3, double %v4, double %v5, i32 %a, i32 %b) #0 {
-; CHECK-LABEL: select_or_olt_ueq_ogt:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcmp d0, d1
-; CHECK-NEXT:    fccmp d2, d3, #4, pl
-; CHECK-NEXT:    fccmp d2, d3, #1, ne
-; CHECK-NEXT:    fccmp d4, d5, #0, vc
-; CHECK-NEXT:    csel w0, w0, w1, gt
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: select_or_olt_ueq_ogt:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    fcmp d0, d1
+; SDISEL-NEXT:    fccmp d2, d3, #4, pl
+; SDISEL-NEXT:    fccmp d2, d3, #1, ne
+; SDISEL-NEXT:    fccmp d4, d5, #0, vc
+; SDISEL-NEXT:    csel w0, w0, w1, gt
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: select_or_olt_ueq_ogt:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    fcmp d2, d3
+; GISEL-NEXT:    cset w8, eq
+; GISEL-NEXT:    cset w9, vs
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    fcmp d0, d1
+; GISEL-NEXT:    csel w9, w0, w1, mi
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w8, w0, w9, ne
+; GISEL-NEXT:    fcmp d4, d5
+; GISEL-NEXT:    csel w0, w0, w8, gt
+; GISEL-NEXT:    ret
   %c0 = fcmp olt double %v0, %v1
   %c1 = fcmp ueq double %v2, %v3
   %c2 = fcmp ogt double %v4, %v5
@@ -928,8 +1070,9 @@ define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32
 ; GISEL-NEXT:    fcvt s2, h2
 ; GISEL-NEXT:    fcvt s3, h3
 ; GISEL-NEXT:    fcmp s0, s1
-; GISEL-NEXT:    fccmp s2, s3, #8, mi
-; GISEL-NEXT:    csel w0, w0, w1, ge
+; GISEL-NEXT:    csel w8, w0, w1, mi
+; GISEL-NEXT:    fcmp s2, s3
+; GISEL-NEXT:    csel w0, w8, w1, ge
 ; GISEL-NEXT:    ret
   %c0 = fcmp olt half %v0, %v1
   %c1 = fcmp oge half %v2, %v3
@@ -957,10 +1100,14 @@ define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32
 ; GISEL-NEXT:    fcvt s1, h1
 ; GISEL-NEXT:    fcvt s2, h2
 ; GISEL-NEXT:    fcvt s3, h3
+; GISEL-NEXT:    fcmp s2, s3
+; GISEL-NEXT:    cset w8, mi
+; GISEL-NEXT:    cset w9, gt
+; GISEL-NEXT:    orr w8, w8, w9
 ; GISEL-NEXT:    fcmp s0, s1
-; GISEL-NEXT:    fccmp s2, s3, #4, mi
-; GISEL-NEXT:    fccmp s2, s3, #1, ne
-; GISEL-NEXT:    csel w0, w0, w1, vc
+; GISEL-NEXT:    csel w9, w0, w1, mi
+; GISEL-NEXT:    tst w8, #0x1
+; GISEL-NEXT:    csel w0, w9, w1, ne
 ; GISEL-NEXT:    ret
   %c0 = fcmp olt half %v0, %v1
   %c1 = fcmp one half %v2, %v3
@@ -1005,14 +1152,26 @@ define i32 @f128_select_and_olt_oge(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3,
 ; This testcase resembles the core problem of http://llvm.org/PR39550
 ; (an OR operation is 2 levels deep but needs to be implemented first)
 define i32 @deep_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
-; CHECK-LABEL: deep_or:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    cmp w2, #20
-; CHECK-NEXT:    ccmp w2, #15, #4, ne
-; CHECK-NEXT:    ccmp w1, #0, #4, eq
-; CHECK-NEXT:    ccmp w0, #0, #4, ne
-; CHECK-NEXT:    csel w0, w4, w5, ne
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: deep_or:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    cmp w2, #20
+; SDISEL-NEXT:    ccmp w2, #15, #4, ne
+; SDISEL-NEXT:    ccmp w1, #0, #4, eq
+; SDISEL-NEXT:    ccmp w0, #0, #4, ne
+; SDISEL-NEXT:    csel w0, w4, w5, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: deep_or:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w0, #0
+; GISEL-NEXT:    csel w8, w4, w5, ne
+; GISEL-NEXT:    cmp w1, #0
+; GISEL-NEXT:    csel w8, w8, w5, ne
+; GISEL-NEXT:    cmp w2, #20
+; GISEL-NEXT:    csel w9, w8, w5, eq
+; GISEL-NEXT:    cmp w2, #15
+; GISEL-NEXT:    csel w0, w8, w9, eq
+; GISEL-NEXT:    ret
   %c0 = icmp ne i32 %a0, 0
   %c1 = icmp ne i32 %a1, 0
   %c2 = icmp eq i32 %a2, 15
@@ -1027,14 +1186,26 @@ define i32 @deep_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
 
 ; Variation of deep_or, we still need to implement the OR first though.
 define i32 @deep_or1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
-; CHECK-LABEL: deep_or1:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    cmp w2, #20
-; CHECK-NEXT:    ccmp w2, #15, #4, ne
-; CHECK-NEXT:    ccmp w0, #0, #4, eq
-; CHECK-NEXT:    ccmp w1, #0, #4, ne
-; CHECK-NEXT:    csel w0, w4, w5, ne
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: deep_or1:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    cmp w2, #20
+; SDISEL-NEXT:    ccmp w2, #15, #4, ne
+; SDISEL-NEXT:    ccmp w0, #0, #4, eq
+; SDISEL-NEXT:    ccmp w1, #0, #4, ne
+; SDISEL-NEXT:    csel w0, w4, w5, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: deep_or1:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w1, #0
+; GISEL-NEXT:    csel w8, w4, w5, ne
+; GISEL-NEXT:    cmp w2, #20
+; GISEL-NEXT:    csel w9, w8, w5, eq
+; GISEL-NEXT:    cmp w2, #15
+; GISEL-NEXT:    csel w8, w8, w9, eq
+; GISEL-NEXT:    cmp w0, #0
+; GISEL-NEXT:    csel w0, w8, w5, ne
+; GISEL-NEXT:    ret
   %c0 = icmp ne i32 %a0, 0
   %c1 = icmp ne i32 %a1, 0
   %c2 = icmp eq i32 %a2, 15
@@ -1049,14 +1220,26 @@ define i32 @deep_or1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
 
 ; Variation of deep_or, we still need to implement the OR first though.
 define i32 @deep_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
-; CHECK-LABEL: deep_or2:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    cmp w2, #20
-; CHECK-NEXT:    ccmp w2, #15, #4, ne
-; CHECK-NEXT:    ccmp w1, #0, #4, eq
-; CHECK-NEXT:    ccmp w0, #0, #4, ne
-; CHECK-NEXT:    csel w0, w4, w5, ne
-; CHECK-NEXT:    ret
+; SDISEL-LABEL: deep_or2:
+; SDISEL:       ; %bb.0:
+; SDISEL-NEXT:    cmp w2, #20
+; SDISEL-NEXT:    ccmp w2, #15, #4, ne
+; SDISEL-NEXT:    ccmp w1, #0, #4, eq
+; SDISEL-NEXT:    ccmp w0, #0, #4, ne
+; SDISEL-NEXT:    csel w0, w4, w5, ne
+; SDISEL-NEXT:    ret
+;
+; GISEL-LABEL: deep_or2:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    cmp w2, #20
+; GISEL-NEXT:    csel w8, w4, w5, eq
+; GISEL-NEXT:    cmp w2, #15
+; GISEL-NEXT:    csel w8, w4, w8, eq
+; GISEL-NEXT:    cmp w1, #0
+; GISEL-NEXT:    csel w8, w8, w5, ne
+; GISEL-NEXT:    cmp w0, #0
+; GISEL-NEXT:    csel w0, w8, w5, ne
+; GISEL-NEXT:    ret
   %c0 = icmp ne i32 %a0, 0
   %c1 = icmp ne i32 %a1, 0
   %c2 = icmp eq i32 %a2, 15
@@ -1095,22 +1278,29 @@ define i32 @multiccmp(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %x, i32 %y) #0 {
 ;
 ; GISEL-LABEL: multiccmp:
 ; GISEL:       ; %bb.0: ; %entry
-; GISEL-NEXT:    stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; GISEL-NEXT:    stp x29, x30, [sp, #16] ; 16-byte Folded Spill
-; GISEL-NEXT:    mov x19, x5
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w8, gt
+; GISEL-NEXT:    stp x24, x23, [sp, #-64]! ; 16-byte Folded Spill
+; GISEL-NEXT:    stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; GISEL-NEXT:    stp x20, x19, [sp, #32] ; 16-byte Folded Spill
+; GISEL-NEXT:    stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; GISEL-NEXT:    mov x19, x0
+; GISEL-NEXT:    mov x20, x1
+; GISEL-NEXT:    mov x21, x2
+; GISEL-NEXT:    mov x22, x3
+; GISEL-NEXT:    mov x23, x5
 ; GISEL-NEXT:    cmp w2, w3
-; GISEL-NEXT:    cset w9, ne
-; GISEL-NEXT:    and w20, w8, w9
-; GISEL-NEXT:    tst w20, #0x1
-; GISEL-NEXT:    csel w0, w5, w4, ne
+; GISEL-NEXT:    csel w8, w5, w4, ne
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel w0, w8, w4, gt
 ; GISEL-NEXT:    bl _callee
-; GISEL-NEXT:    tst w20, #0x1
-; GISEL-NEXT:    csel w0, w0, w19, ne
+; GISEL-NEXT:    cmp w21, w22
+; GISEL-NEXT:    csel w8, w0, w23, ne
+; GISEL-NEXT:    cmp w19, w20
+; GISEL-NEXT:    csel w0, w8, w23, gt
 ; GISEL-NEXT:    bl _callee
-; GISEL-NEXT:    ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; GISEL-NEXT:    ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; GISEL-NEXT:    ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; GISEL-NEXT:    ldp x20, x19, [sp, #32] ; 16-byte Folded Reload
+; GISEL-NEXT:    ldp x22, x21, [sp, #16] ; 16-byte Folded Reload
+; GISEL-NEXT:    ldp x24, x23, [sp], #64 ; 16-byte Folded Reload
 ; GISEL-NEXT:    ret
 entry:
   %c0 = icmp sgt i32 %s0, %s1
@@ -1155,21 +1345,18 @@ define i32 @multiccmp2(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %x, i32 %y) #0 {
 ; GISEL-NEXT:    stp x20, x19, [sp, #16] ; 16-byte Folded Spill
 ; GISEL-NEXT:    stp x29, x30, [sp, #32] ; 16-byte Folded Spill
 ; GISEL-NEXT:    mov x19, x0
-; GISEL-NEXT:    mov x20, x3
-; GISEL-NEXT:    mov x21, x5
-; GISEL-NEXT:    cmp w0, w1
-; GISEL-NEXT:    cset w8, gt
+; GISEL-NEXT:    mov x20, x2
+; GISEL-NEXT:    mov x21, x3
+; GISEL-NEXT:    mov x22, x5
 ; GISEL-NEXT:    cmp w2, w3
-; GISEL-NEXT:    cset w22, ne
-; GISEL-NEXT:    and w8, w8, w22
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w5, w4, ne
+; GISEL-NEXT:    csel w8, w5, w4, ne
+; GISEL-NEXT:    cmp w0, w1
+; GISEL-NEXT:    csel w0, w8, w4, gt
 ; GISEL-NEXT:    bl _callee
-; GISEL-NEXT:    cmp w19, w20
-; GISEL-NEXT:    cset w8, eq
-; GISEL-NEXT:    and w8, w22, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w0, w0, w21, ne
+; GISEL-NEXT:    cmp w19, w21
+; GISEL-NEXT:    csel w8, w0, w22, eq
+; GISEL-NEXT:    cmp w20, w21
+; GISEL-NEXT:    csel w0, w8, w22, ne
 ; GISEL-NEXT:    bl _callee
 ; GISEL-NEXT:    ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
 ; GISEL-NEXT:    ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-select.ll b/llvm/test/CodeGen/AArch64/dag-combine-select.ll
index 56208f19782ce..3d6ee1035e4e4 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-select.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-select.ll
@@ -32,36 +32,18 @@ define i32 @test0(i32 %v0, i32 %v1, i32 %v2) {
 ; sequences. This case should be transformed to select(C0, select(C1, x, y), y)
 ; anyway to get CSE effects.
 define void @test1(i32 %bitset, i32 %val0, i32 %val1) {
-; SDISEL-LABEL: test1:
-; SDISEL:       // %bb.0:
-; SDISEL-NEXT:    cmp w0, #7
-; SDISEL-NEXT:    adrp x9, out
-; SDISEL-NEXT:    csel w8, w1, w2, eq
-; SDISEL-NEXT:    cmp w8, #13
-; SDISEL-NEXT:    csel w8, w1, w2, lo
-; SDISEL-NEXT:    cmp w0, #42
-; SDISEL-NEXT:    csel w10, w1, w8, eq
-; SDISEL-NEXT:    str w8, [x9, :lo12:out]
-; SDISEL-NEXT:    str w10, [x9, :lo12:out]
-; SDISEL-NEXT:    ret
-;
-; GISEL-LABEL: test1:
-; GISEL:       // %bb.0:
-; GISEL-NEXT:    cmp w0, #7
-; GISEL-NEXT:    csel w8, w1, w2, eq
-; GISEL-NEXT:    cmp w8, #13
-; GISEL-NEXT:    cset w8, lo
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    csel w9, w1, w2, ne
-; GISEL-NEXT:    cmp w0, #42
-; GISEL-NEXT:    cset w10, eq
-; GISEL-NEXT:    orr w8, w10, w8
-; GISEL-NEXT:    tst w8, #0x1
-; GISEL-NEXT:    adrp x8, out
-; GISEL-NEXT:    csel w10, w1, w2, ne
-; GISEL-NEXT:    str w9, [x8, :lo12:out]
-; GISEL-NEXT:    str w10, [x8, :lo12:out]
-; GISEL-NEXT:    ret
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cmp w0, #7
+; CHECK-NEXT:    adrp x9, out
+; CHECK-NEXT:    csel w8, w1, w2, eq
+; CHECK-NEXT:    cmp w8, #13
+; CHECK-NEXT:    csel w8, w1, w2, lo
+; CHECK-NEXT:    cmp w0, #42
+; CHECK-NEXT:    csel w10, w1, w8, eq
+; CHECK-NEXT:    str w8, [x9, :lo12:out]
+; CHECK-NEXT:    str w10, [x9, :lo12:out]
+; CHECK-NEXT:    ret
   %cmp1 = icmp eq i32 %bitset, 7
   %cond = select i1 %cmp1, i32 %val0, i32 %val1
   %cmp5 = icmp ult i32 %cond, 13
@@ -73,5 +55,3 @@ define void @test1(i32 %bitset, i32 %val0, i32 %val1) {
   store volatile i32 %cond17, ptr @out, align 4
   ret void
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
index aeb6d7d9dc714..b64cc9070f86a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
@@ -27,16 +27,20 @@ define amdgpu_ps float @select_vgpr_sgpr_trunc_cond(i32 inreg %a, i32 %b, i32 %c
 define amdgpu_ps float @select_vgpr_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 %b, i32 %c) {
 ; WAVE64-LABEL: select_vgpr_sgpr_trunc_and_cond:
 ; WAVE64:       ; %bb.0:
-; WAVE64-NEXT:    s_and_b32 s0, s0, s1
+; WAVE64-NEXT:    s_and_b32 s1, 1, s1
+; WAVE64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s1
 ; WAVE64-NEXT:    s_and_b32 s0, 1, s0
+; WAVE64-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; WAVE64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
 ; WAVE64-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; WAVE64-NEXT:    ; return to shader part epilog
 ;
 ; WAVE32-LABEL: select_vgpr_sgpr_trunc_and_cond:
 ; WAVE32:       ; %bb.0:
-; WAVE32-NEXT:    s_and_b32 s0, s0, s1
+; WAVE32-NEXT:    s_and_b32 s1, 1, s1
 ; WAVE32-NEXT:    s_and_b32 s0, 1, s0
+; WAVE32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s1
+; WAVE32-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; WAVE32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
 ; WAVE32-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; WAVE32-NEXT:    ; return to shader part epilog
@@ -51,10 +55,12 @@ define amdgpu_ps float @select_vgpr_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inre
 define amdgpu_ps i32 @select_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 inreg %b, i32 inreg %c) {
 ; GCN-LABEL: select_sgpr_trunc_and_cond:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_and_b32 s0, s0, s1
+; GCN-NEXT:    s_and_b32 s1, s1, 1
+; GCN-NEXT:    s_cmp_lg_u32 s1, 0
+; GCN-NEXT:    s_cselect_b32 s1, s2, s3
 ; GCN-NEXT:    s_and_b32 s0, s0, 1
 ; GCN-NEXT:    s_cmp_lg_u32 s0, 0
-; GCN-NEXT:    s_cselect_b32 s0, s2, s3
+; GCN-NEXT:    s_cselect_b32 s0, s1, s3
 ; GCN-NEXT:    ; return to shader part epilog
   %cc.0 = trunc i32 %a.0 to i1
   %cc.1 = trunc i32 %a.1 to i1

>From 46205a730f6a6d8d68d4de17bd0a8c963d0d5695 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Sun, 3 Dec 2023 10:46:57 +0100
Subject: [PATCH 2/2] fix xor check and mark corresponding test as failed

---
 llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp          | 6 +++---
 llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir | 8 +++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d83215dcc83b4..2a4f5da863a72 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6269,10 +6269,10 @@ bool CombinerHelper::tryCombineSelectLogical(GSelect *Select,
       GXor *Xor = cast<GXor>(CondDef);
       auto IConstant =
           getIConstantVRegValWithLookThrough(Xor->getRHSReg(), MRI);
-      if (IConstant && IConstant->Value == 1) {
-        // transform: select (xor Cond0, 1), X, Y
+      if (IConstant && IConstant->Value == -1) {
+        // transform: select (xor Cond0, -1), X, Y
         // to:        select Cond0 Y, X
-        // a xor 1 reads like not a.
+        // a xor -1 reads like not a.
         MatchInfo = [=](MachineIRBuilder &B) {
           B.setInstrAndDebugLoc(*Select);
           B.buildSelect(DstReg, Xor->getLHSReg(), Select->getFalseReg(),
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index a60148ffe5716..42acdaa2455d2 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -365,11 +365,11 @@ body:             |
     $w0 = COPY %ext(s32)
 ...
 ---
-name:            xor_cond
+name:            xor_cond_failed
 body:             |
   bb.1:
     liveins: $x0, $x1, $x2
-    ; CHECK-LABEL: name: xor_cond
+    ; CHECK-LABEL: name: xor_cond_failed
     ; CHECK: liveins: $x0, $x1, $x2
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
@@ -378,7 +378,9 @@ body:             |
     ; CHECK-NEXT: %x:_(s1) = G_TRUNC [[COPY]](s64)
     ; CHECK-NEXT: %t:_(s1) = G_TRUNC [[COPY1]](s64)
     ; CHECK-NEXT: %f:_(s1) = G_TRUNC [[COPY2]](s64)
-    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %x(s1), %f, %t
+    ; CHECK-NEXT: %one:_(s1) = G_CONSTANT i1 true
+    ; CHECK-NEXT: %cond:_(s1) = G_XOR %x, %one
+    ; CHECK-NEXT: %sel:_(s1) = G_SELECT %cond(s1), %t, %f
     ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s1)
     ; CHECK-NEXT: $w0 = COPY %ext(s32)
     %0:_(s64) = COPY $x0



More information about the llvm-commits mailing list