[llvm] [GlobalIsel] Combine select to integer minmax (second attemnt). (PR #77520)

Thorsten Schütt via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 9 12:48:37 PST 2024


https://github.com/tschuett created https://github.com/llvm/llvm-project/pull/77520

Instcombine canonicalizes selects to floating point and integer minmax. This and the dag combiner canonicalize to floating point minmax. None of them canonicalizes to integer minmax. On Neoverse V2 basic integer arithmetic and integer minmax have the same costs.

>From 11170008c586bbaabf2572f5540aec48853b4631 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= <schuett at gmail.com>
Date: Tue, 9 Jan 2024 21:46:23 +0100
Subject: [PATCH] [GlobalIsel] Combine select to integer minmax (second
 attemnt).

Instcombine canonicalizes selects to floating point and integer
minmax. This and the dag combiner canonicalize to floating point
minmax. None of them canonicalizes to integer minmax. On Neoverse V2
basic integer arithmetic and integer minmax have the same costs.
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |   3 +
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  84 ++++++
 .../AArch64/GlobalISel/arm64-atomic.ll        |  32 +-
 .../AArch64/GlobalISel/arm64-pcsections.ll    |   8 +-
 .../AArch64/GlobalISel/combine-select.mir     | 281 ++++++++++++++++++
 5 files changed, 388 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index dcc1a4580b14a2..a6e9406bed06a2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -910,6 +910,9 @@ class CombinerHelper {
 
   bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo);
 
+  /// Try to fold (icmp X, Y) ? X : Y -> integer minmax.
+  bool tryFoldSelectToIntMinMax(GSelect *Select, BuildFnTy &MatchInfo);
+
   bool isOneOrOneSplat(Register Src, bool AllowUndefs);
   bool isZeroOrZeroSplat(Register Src, bool AllowUndefs);
   bool isConstantSplatVector(Register Src, int64_t SplatValue,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 8b15bdb0aca30b..fc2793bd7a133d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6548,6 +6548,87 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
   return false;
 }
 
+bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
+                                              BuildFnTy &MatchInfo) {
+  Register DstReg = Select->getReg(0);
+  Register Cond = Select->getCondReg();
+  Register True = Select->getTrueReg();
+  Register False = Select->getFalseReg();
+  LLT DstTy = MRI.getType(DstReg);
+
+  // We need an G_ICMP on the condition register.
+  GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI);
+  if (!Cmp)
+    return false;
+
+  // We want to fold the icmp and replace the select.
+  if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
+    return false;
+
+  CmpInst::Predicate Pred = Cmp->getCond();
+  // We need a larger or smaller predicate for
+  // canonicalization.
+  if (CmpInst::isEquality(Pred))
+    return false;
+
+  Register CmpLHS = Cmp->getLHSReg();
+  Register CmpRHS = Cmp->getRHSReg();
+
+  // We can swap CmpLHS and CmpRHS for higher hitrate.
+  if (True == CmpRHS && False == CmpLHS) {
+    std::swap(CmpLHS, CmpRHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  // (icmp X, Y) ? X : Y -> integer minmax.
+  // see matchSelectPattern in ValueTracking.
+  // Legality between G_SELECT and integer minmax can differ.
+  if (True == CmpLHS && False == CmpRHS) {
+    switch (Pred) {
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE: {
+      if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
+        return false;
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildUMax(DstReg, True, False);
+      };
+      return true;
+    }
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE: {
+      if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
+        return false;
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildSMax(DstReg, True, False);
+      };
+      return true;
+    }
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE: {
+      if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
+        return false;
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildUMin(DstReg, True, False);
+      };
+      return true;
+    }
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE: {
+      if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
+        return false;
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildSMin(DstReg, True, False);
+      };
+      return true;
+    }
+    default:
+      return false;
+    }
+  }
+
+  return false;
+}
+
 bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
   GSelect *Select = cast<GSelect>(&MI);
 
@@ -6557,5 +6638,8 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
   if (tryFoldBoolSelectToLogic(Select, MatchInfo))
     return true;
 
+  if (tryFoldSelectToIntMinMax(Select, MatchInfo))
+    return true;
+
   return false;
 }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 739332414c1985..0e9c126e97a3d8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -2421,7 +2421,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O1-NEXT:    ldaxrb w8, [x0]
 ; CHECK-NOLSE-O1-NEXT:    sxtb w9, w8
 ; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, sxtb
-; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, le
+; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, lt
 ; CHECK-NOLSE-O1-NEXT:    stxrb w10, w9, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB33_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -2435,7 +2435,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-OUTLINE-O1-NEXT:    ldaxrb w8, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    sxtb w9, w8
 ; CHECK-OUTLINE-O1-NEXT:    cmp w9, w1, sxtb
-; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, le
+; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, lt
 ; CHECK-OUTLINE-O1-NEXT:    stxrb w10, w9, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB33_1
 ; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -2662,7 +2662,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O1-NEXT:    ldaxrb w8, [x0]
 ; CHECK-NOLSE-O1-NEXT:    and w10, w8, #0xff
 ; CHECK-NOLSE-O1-NEXT:    cmp w10, w9
-; CHECK-NOLSE-O1-NEXT:    csel w10, w10, w9, ls
+; CHECK-NOLSE-O1-NEXT:    csel w10, w10, w9, lo
 ; CHECK-NOLSE-O1-NEXT:    stlxrb w11, w10, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB35_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -2677,7 +2677,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-OUTLINE-O1-NEXT:    ldaxrb w8, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    and w10, w8, #0xff
 ; CHECK-OUTLINE-O1-NEXT:    cmp w10, w9
-; CHECK-OUTLINE-O1-NEXT:    csel w10, w10, w9, ls
+; CHECK-OUTLINE-O1-NEXT:    csel w10, w10, w9, lo
 ; CHECK-OUTLINE-O1-NEXT:    stlxrb w11, w10, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cbnz w11, LBB35_1
 ; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -3477,7 +3477,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O1-NEXT:    ldaxrh w8, [x0]
 ; CHECK-NOLSE-O1-NEXT:    sxth w9, w8
 ; CHECK-NOLSE-O1-NEXT:    cmp w9, w1, sxth
-; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, le
+; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, lt
 ; CHECK-NOLSE-O1-NEXT:    stxrh w10, w9, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB43_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -3491,7 +3491,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-OUTLINE-O1-NEXT:    ldaxrh w8, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    sxth w9, w8
 ; CHECK-OUTLINE-O1-NEXT:    cmp w9, w1, sxth
-; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, le
+; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, lt
 ; CHECK-OUTLINE-O1-NEXT:    stxrh w10, w9, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB43_1
 ; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -3718,7 +3718,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O1-NEXT:    ldaxrh w8, [x0]
 ; CHECK-NOLSE-O1-NEXT:    and w10, w8, #0xffff
 ; CHECK-NOLSE-O1-NEXT:    cmp w10, w9
-; CHECK-NOLSE-O1-NEXT:    csel w10, w10, w9, ls
+; CHECK-NOLSE-O1-NEXT:    csel w10, w10, w9, lo
 ; CHECK-NOLSE-O1-NEXT:    stlxrh w11, w10, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cbnz w11, LBB45_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -3733,7 +3733,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-OUTLINE-O1-NEXT:    ldaxrh w8, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    and w10, w8, #0xffff
 ; CHECK-OUTLINE-O1-NEXT:    cmp w10, w9
-; CHECK-OUTLINE-O1-NEXT:    csel w10, w10, w9, ls
+; CHECK-OUTLINE-O1-NEXT:    csel w10, w10, w9, lo
 ; CHECK-OUTLINE-O1-NEXT:    stlxrh w11, w10, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cbnz w11, LBB45_1
 ; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -4526,7 +4526,7 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
-; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, le
+; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, lt
 ; CHECK-NOLSE-O1-NEXT:    stxr w10, w9, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB53_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -4539,7 +4539,7 @@ define i32 @atomicrmw_min_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-OUTLINE-O1-NEXT:    ldaxr w8, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cmp w8, w1
-; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, le
+; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, lt
 ; CHECK-OUTLINE-O1-NEXT:    stxr w10, w9, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB53_1
 ; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -4754,7 +4754,7 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldaxr w8, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cmp w8, w1
-; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, ls
+; CHECK-NOLSE-O1-NEXT:    csel w9, w8, w1, lo
 ; CHECK-NOLSE-O1-NEXT:    stlxr w10, w9, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB55_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -4767,7 +4767,7 @@ define i32 @atomicrmw_umin_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-OUTLINE-O1-NEXT:    ldaxr w8, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cmp w8, w1
-; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, ls
+; CHECK-OUTLINE-O1-NEXT:    csel w9, w8, w1, lo
 ; CHECK-OUTLINE-O1-NEXT:    stlxr w10, w9, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB55_1
 ; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -5547,7 +5547,7 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cmp x8, x1
-; CHECK-NOLSE-O1-NEXT:    csel x9, x8, x1, le
+; CHECK-NOLSE-O1-NEXT:    csel x9, x8, x1, lt
 ; CHECK-NOLSE-O1-NEXT:    stxr w10, x9, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB63_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -5560,7 +5560,7 @@ define i64 @atomicrmw_min_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-OUTLINE-O1-NEXT:    ldaxr x8, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cmp x8, x1
-; CHECK-OUTLINE-O1-NEXT:    csel x9, x8, x1, le
+; CHECK-OUTLINE-O1-NEXT:    csel x9, x8, x1, lt
 ; CHECK-OUTLINE-O1-NEXT:    stxr w10, x9, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB63_1
 ; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -5775,7 +5775,7 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldaxr x8, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cmp x8, x1
-; CHECK-NOLSE-O1-NEXT:    csel x9, x8, x1, ls
+; CHECK-NOLSE-O1-NEXT:    csel x9, x8, x1, lo
 ; CHECK-NOLSE-O1-NEXT:    stlxr w10, x9, [x0]
 ; CHECK-NOLSE-O1-NEXT:    cbnz w10, LBB65_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
@@ -5788,7 +5788,7 @@ define i64 @atomicrmw_umin_i64(ptr %ptr, i64 %rhs) {
 ; CHECK-OUTLINE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-OUTLINE-O1-NEXT:    ldaxr x8, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cmp x8, x1
-; CHECK-OUTLINE-O1-NEXT:    csel x9, x8, x1, ls
+; CHECK-OUTLINE-O1-NEXT:    csel x9, x8, x1, lo
 ; CHECK-OUTLINE-O1-NEXT:    stlxr w10, x9, [x0]
 ; CHECK-OUTLINE-O1-NEXT:    cbnz w10, LBB65_1
 ; CHECK-OUTLINE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index 4c07081404c889..5a7bd6ee20f9b4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -888,7 +888,7 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -943,7 +943,7 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w10 = ANDWri renamable $w8, 7
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def $x10, pcsections !0
+  ; CHECK-NEXT:   renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -1148,7 +1148,7 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 13, implicit killed $nzcv, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
@@ -1203,7 +1203,7 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w10 = ANDWri renamable $w8, 15
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w10, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 9, implicit killed $nzcv, implicit-def $x10, pcsections !0
+  ; CHECK-NEXT:   renamable $w10 = CSELWr killed renamable $w10, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
index be2de620fa456c..260cb72b0426a8 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir
@@ -544,3 +544,284 @@ body:             |
     %ext:_(s32) = G_ANYEXT %sel
     $w0 = COPY %ext(s32)
 ...
+---
+# select test(failed,registers) select icmp_ugt t,f_t_f --> umax(t,f)
+name:            select_failed_icmp_ugt_t_f_t_f_umax_t_f
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: select_failed_icmp_ugt_t_f_t_f_umax_t_f
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-NEXT: %t:_(s8) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %f:_(s8) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %y:_(s8) = G_TRUNC [[COPY2]](s64)
+    ; CHECK-NEXT: %z:_(s8) = G_TRUNC [[COPY3]](s64)
+    ; CHECK-NEXT: %c:_(s8) = G_ICMP intpred(ugt), %t(s8), %y
+    ; CHECK-NEXT: %sel:_(s8) = exact G_SELECT %c(s8), %f, %z
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %sel(s8)
+    ; CHECK-NEXT: $w0 = COPY %ext(s32)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %2:_(s64) = COPY $x2
+    %3:_(s64) = COPY $x3
+    %4:_(s64) = COPY $x4
+    %t:_(s8) = G_TRUNC %0
+    %f:_(s8) = G_TRUNC %1
+    %y:_(s8) = G_TRUNC %2
+    %z:_(s8) = G_TRUNC %3
+    %c:_(s8) = G_ICMP intpred(ugt), %t(s8), %y(s8)
+    %sel:_(s8) = exact G_SELECT %c, %f, %z
+    %ext:_(s32) = G_ANYEXT %sel
+    $w0 = COPY %ext(s32)
+...
+---
+# test select icmp_ugt t,f_t_f --> umax(t,f)
+name:            select_icmp_ugt_t_f_t_f_umax_t_f
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: select_icmp_ugt_t_f_t_f_umax_t_f
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
+    ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
+    ; CHECK-NEXT: %sel:_(<4 x s32>) = G_UMAX %t, %f
+    ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %t1:_(s32) = G_TRUNC %0
+    %f1:_(s32) = G_TRUNC %1
+    %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
+    %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
+    %c:_(<4 x s32>) = G_ICMP intpred(ugt), %t(<4 x s32>), %f(<4 x s32>)
+    %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
+    $q0 = COPY %sel(<4 x s32>)
+...
+---
+# test select icmp_uge t,f_t_f --> umax(t,f)
+name:            select_icmp_uge_t_f_t_f_umax_t_f
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: select_icmp_uge_t_f_t_f_umax_t_f
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
+    ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
+    ; CHECK-NEXT: %sel:_(<4 x s32>) = G_UMAX %t, %f
+    ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %t1:_(s32) = G_TRUNC %0
+    %f1:_(s32) = G_TRUNC %1
+    %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
+    %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
+    %c:_(<4 x s32>) = G_ICMP intpred(uge), %t(<4 x s32>), %f(<4 x s32>)
+    %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
+    $q0 = COPY %sel(<4 x s32>)
+...
+---
+# test select icmp_sgt t,f_t_f --> smax(t,f)
+name:            select_icmp_sgt_t_f_t_f_smax_t_f
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: select_icmp_sgt_t_f_t_f_smax_t_f
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
+    ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
+    ; CHECK-NEXT: %sel:_(<4 x s32>) = G_SMAX %t, %f
+    ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %t1:_(s32) = G_TRUNC %0
+    %f1:_(s32) = G_TRUNC %1
+    %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
+    %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
+    %c:_(<4 x s32>) = G_ICMP intpred(sgt), %t(<4 x s32>), %f(<4 x s32>)
+    %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
+    $q0 = COPY %sel(<4 x s32>)
+...
+---
+# test select icmp_sge t,f_t_f --> smax(t,f)
+name:            select_icmp_sge_t_f_t_f_smax_t_f
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: select_icmp_sge_t_f_t_f_smax_t_f
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
+    ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
+    ; CHECK-NEXT: %sel:_(<4 x s32>) = G_SMAX %t, %f
+    ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %t1:_(s32) = G_TRUNC %0
+    %f1:_(s32) = G_TRUNC %1
+    %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
+    %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
+    %c:_(<4 x s32>) = G_ICMP intpred(sge), %t(<4 x s32>), %f(<4 x s32>)
+    %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
+    $q0 = COPY %sel(<4 x s32>)
+...
+---
+# test select icmp_ult t,f_t_f --> umin(t,f)
+name:            select_icmp_ult_t_f_t_f_umin_t_f
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: select_icmp_ult_t_f_t_f_umin_t_f
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
+    ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
+    ; CHECK-NEXT: %sel:_(<4 x s32>) = G_UMIN %t, %f
+    ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %t1:_(s32) = G_TRUNC %0
+    %f1:_(s32) = G_TRUNC %1
+    %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
+    %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
+    %c:_(<4 x s32>) = G_ICMP intpred(ult), %t(<4 x s32>), %f(<4 x s32>)
+    %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
+    $q0 = COPY %sel(<4 x s32>)
+...
+---
+# test select icmp_ule t,f_t_f --> umin(t,f)
+name:            select_icmp_ule_t_f_t_f_umin_t_f
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: select_icmp_ule_t_f_t_f_umin_t_f
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
+    ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
+    ; CHECK-NEXT: %sel:_(<4 x s32>) = G_UMIN %t, %f
+    ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %t1:_(s32) = G_TRUNC %0
+    %f1:_(s32) = G_TRUNC %1
+    %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
+    %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
+    %c:_(<4 x s32>) = G_ICMP intpred(ule), %t(<4 x s32>), %f(<4 x s32>)
+    %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
+    $q0 = COPY %sel(<4 x s32>)
+...
+---
+# test select icmp_slt t,f_t_f --> smin(t,f)
+name:            select_icmp_slt_t_f_t_f_smin_t_f
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: select_icmp_slt_t_f_t_f_smin_t_f
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
+    ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
+    ; CHECK-NEXT: %sel:_(<4 x s32>) = G_SMIN %t, %f
+    ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %t1:_(s32) = G_TRUNC %0
+    %f1:_(s32) = G_TRUNC %1
+    %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
+    %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
+    %c:_(<4 x s32>) = G_ICMP intpred(slt), %t(<4 x s32>), %f(<4 x s32>)
+    %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
+    $q0 = COPY %sel(<4 x s32>)
+...
+---
+# test select icmp_sle t,f_t_f --> smin(t,f)
+name:            select_icmp_sle_t_f_t_f_smin_t_f
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: select_icmp_sle_t_f_t_f_smin_t_f
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
+    ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
+    ; CHECK-NEXT: %sel:_(<4 x s32>) = G_SMIN %t, %f
+    ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %t1:_(s32) = G_TRUNC %0
+    %f1:_(s32) = G_TRUNC %1
+    %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
+    %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
+    %c:_(<4 x s32>) = G_ICMP intpred(sle), %t(<4 x s32>), %f(<4 x s32>)
+    %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
+    $q0 = COPY %sel(<4 x s32>)
+...
+---
+# multi use test select icmp_sle t,f_t_f --> smin(t,f) failed
+name:            multi_use_select_icmp_sle_t_f_t_f_smin_t_f_failed
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: multi_use_select_icmp_sle_t_f_t_f_smin_t_f_failed
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NEXT: %t1:_(s32) = G_TRUNC [[COPY]](s64)
+    ; CHECK-NEXT: %f1:_(s32) = G_TRUNC [[COPY1]](s64)
+    ; CHECK-NEXT: %t:_(<4 x s32>) = G_BUILD_VECTOR %t1(s32), %t1(s32), %t1(s32), %t1(s32)
+    ; CHECK-NEXT: %f:_(<4 x s32>) = G_BUILD_VECTOR %f1(s32), %f1(s32), %f1(s32), %f1(s32)
+    ; CHECK-NEXT: %c:_(<4 x s32>) = G_ICMP intpred(sle), %t(<4 x s32>), %f
+    ; CHECK-NEXT: $q1 = COPY %c(<4 x s32>)
+    ; CHECK-NEXT: %sel:_(<4 x s32>) = exact G_SELECT %c(<4 x s32>), %t, %f
+    ; CHECK-NEXT: $q0 = COPY %sel(<4 x s32>)
+    %0:_(s64) = COPY $x0
+    %1:_(s64) = COPY $x1
+    %t1:_(s32) = G_TRUNC %0
+    %f1:_(s32) = G_TRUNC %1
+    %t:_(<4 x s32>) = G_BUILD_VECTOR %t1, %t1, %t1, %t1
+    %f:_(<4 x s32>) = G_BUILD_VECTOR %f1, %f1, %f1, %f1
+    %c:_(<4 x s32>) = G_ICMP intpred(sle), %t(<4 x s32>), %f(<4 x s32>)
+    $q1 = COPY %c(<4 x s32>)
+    %sel:_(<4 x s32>) = exact G_SELECT %c, %t, %f
+    $q0 = COPY %sel(<4 x s32>)
+...



More information about the llvm-commits mailing list