[llvm] [GISel] Support narrowing G_ICMP with more than 2 parts. (PR #119335)

Mon Dec 9 23:04:49 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-globalisel

Author: Craig Topper (topperc)

<details>
<summary>Changes</summary>

This allows us to support i128 G_ICMP on RV32. I'm not sure how to test the "left over" part of this as RISC-V always widens to a power of 2 before narrowing.

---

Patch is 248.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119335.diff


29 Files Affected:

- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+54-18) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll (+40-40) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll (+40-40) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll (+40-40) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll (+40-40) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll (+40-40) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll (+40-40) 
- (modified) llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll (+40-40) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmp.mir (+4-4) 
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-threeway-cmp.mir (+8-8) 
- (modified) llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll (+12-16) 
- (modified) llvm/test/CodeGen/AArch64/icmp.ll (+15-17) 
- (modified) llvm/test/CodeGen/AArch64/scmp.ll (+6-8) 
- (modified) llvm/test/CodeGen/AArch64/ucmp.ll (+12-16) 
- (modified) llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll (+3-4) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll (+96-96) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (+105-105) 
- (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+45-45) 
- (modified) llvm/test/CodeGen/Mips/GlobalISel/legalizer/icmp.mir (+32-32) 
- (modified) llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll (+12-12) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-addo-subo-rv32.mir (+24-24) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-icmp-rv32.mir (+114-64) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-icmp-rv64.mir (+96-96) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-sat-rv32.mir (+24-24) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-smax-rv32.mir (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-smin-rv32.mir (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-umax-rv32.mir (+4-4) 
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-umin-rv32.mir (+4-4) 
- (modified) llvm/test/CodeGen/X86/isel-select-cmov.ll (+12-12) 


``````````diff

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 5bfeee05a19c0d..a6997b7a27d450 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1717,14 +1717,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
   case TargetOpcode::G_ICMP: {
     Register LHS = MI.getOperand(2).getReg();
     LLT SrcTy = MRI.getType(LHS);
-    uint64_t SrcSize = SrcTy.getSizeInBits();
     CmpInst::Predicate Pred =
         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
 
-    // TODO: Handle the non-equality case for weird sizes.
-    if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
-      return UnableToLegalize;
-
     LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
     SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
     if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
@@ -1776,19 +1771,60 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
         Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
       MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
     } else {
-      // TODO: Handle non-power-of-two types.
-      assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
-      assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
-      Register LHSL = LHSPartRegs[0];
-      Register LHSH = LHSPartRegs[1];
-      Register RHSL = RHSPartRegs[0];
-      Register RHSH = RHSPartRegs[1];
-      MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
-      MachineInstrBuilder CmpHEQ =
-          MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
-      MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
-          ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
-      MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
+      Register CmpIn;
+      for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
+        Register CmpOut;
+        CmpInst::Predicate PartPred;
+
+        if (I == E - 1 && LHSLeftoverRegs.empty()) {
+          PartPred = Pred;
+          CmpOut = Dst;
+        } else {
+          PartPred = ICmpInst::getUnsignedPredicate(Pred);
+          CmpOut = MRI.createGenericVirtualRegister(ResTy);
+        }
+
+        if (!CmpIn) {
+          MIRBuilder.buildICmp(PartPred, CmpOut,
+                               LHSPartRegs[I], RHSPartRegs[I]);
+        } else {
+          auto Cmp =
+              MIRBuilder.buildICmp(PartPred, ResTy,
+                                   LHSPartRegs[I], RHSPartRegs[I]);
+          auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
+                                            LHSPartRegs[I], RHSPartRegs[I]);
+          MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
+        }
+
+        CmpIn = CmpOut;
+      }
+
+      for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
+        Register CmpOut;
+        CmpInst::Predicate PartPred;
+
+        if (I == E - 1 && LHSLeftoverRegs.empty()) {
+          PartPred = Pred;
+          CmpOut = Dst;
+        } else {
+          PartPred = ICmpInst::getUnsignedPredicate(Pred);
+          CmpOut = MRI.createGenericVirtualRegister(ResTy);
+        }
+
+        if (!CmpIn) {
+          MIRBuilder.buildICmp(PartPred, CmpOut,
+                               LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
+        } else {
+          auto Cmp =
+              MIRBuilder.buildICmp(PartPred, ResTy,
+                                   LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
+          auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
+                                            LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
+          MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
+        }
+
+        CmpIn = CmpOut;
+      }
     }
     MI.eraseFromParent();
     return Legalized;
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
index 88061756d8feeb..d93ef6f8b2869b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
@@ -5755,8 +5755,8 @@ define dso_local i64 @atomicrmw_max_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -5785,8 +5785,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acquire:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -5815,8 +5815,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_release:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -5845,8 +5845,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -5875,8 +5875,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -6230,8 +6230,8 @@ define dso_local i64 @atomicrmw_max_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -6252,8 +6252,8 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -6274,8 +6274,8 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_release:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -6296,8 +6296,8 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -6318,8 +6318,8 @@ define dso_local i128 @atomicrmw_max_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_max_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_max_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -6725,8 +6725,8 @@ define dso_local i64 @atomicrmw_min_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -6755,8 +6755,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acquire:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -6785,8 +6785,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_release:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -6815,8 +6815,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -6845,8 +6845,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -7200,8 +7200,8 @@ define dso_local i64 @atomicrmw_min_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -7222,8 +7222,8 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -7244,8 +7244,8 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_release:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -7266,8 +7266,8 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -7288,8 +7288,8 @@ define dso_local i128 @atomicrmw_min_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_min_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_min_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -7690,8 +7690,8 @@ define dso_local i64 @atomicrmw_umax_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -7720,8 +7720,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acquire:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -7750,8 +7750,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_release:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -7780,8 +7780,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -7810,8 +7810,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -8160,8 +8160,8 @@ define dso_local i64 @atomicrmw_umax_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -8182,8 +8182,8 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -8204,8 +8204,8 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_release:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -8226,8 +8226,8 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_acq_rel:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -8248,8 +8248,8 @@ define dso_local i128 @atomicrmw_umax_i128_unaligned_acq_rel(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umax_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umax_i128_unaligned_seq_cst:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -8650,8 +8650,8 @@ define dso_local i64 @atomicrmw_umin_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_monotonic:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -8680,8 +8680,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acquire:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -8710,8 +8710,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_release:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -8740,8 +8740,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -8770,8 +8770,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 
 define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
-; -O0:    subs x8, x8, x9
 ; -O0:    subs x8, x8, x12
+; -O0:    subs x10, x10, x9
 ; -O0:    subs x13, x13, x9
 ; -O0:    csel w10, w8, w10, eq
 ; -O0:    ands w13, w10, #0x1
@@ -9120,8 +9120,8 @@ define dso_local i64 @atomicrmw_umin_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_monotonic:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -9142,8 +9142,8 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_monotonic(ptr %ptr, i128 %v
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_acquire:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -9164,8 +9164,8 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_acquire(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_umin_i128_unaligned_release:
-; -O0:    subs x8, x8, x10
 ; -O0:    subs x8, x8, x9
+; -O0:    subs x11, x11, x10
 ; -O0:    subs x12, x12, x10
 ; -O0:    csel w11, w8, w11, eq
 ; -O0:    ands w12, w11, #0x1
@@ -9186,8 +9186,8 @@ define dso_local i128 @atomicrmw_umin_i128_unaligned_release(ptr %ptr, i128 %val
 
 define dso_local i128 @atomicrmw_umin_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ;...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/119335