[llvm] e632bb6 - [LoongArch] Add codegen support for atomicrmw umin/umax operation on LA64

via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 14 00:41:27 PDT 2022


Author: gonglingqin
Date: 2022-10-14T15:24:43+08:00
New Revision: e632bb65436ac8c4625243b53666750bc3eda43a

URL: https://github.com/llvm/llvm-project/commit/e632bb65436ac8c4625243b53666750bc3eda43a
DIFF: https://github.com/llvm/llvm-project/commit/e632bb65436ac8c4625243b53666750bc3eda43a.diff

LOG: [LoongArch] Add codegen support for atomicrmw umin/umax operation on LA64

Furthermore, use `beqz $rd, .BB` instead of `beq $rd, $zero, .BB`.

Differential Revision: https://reviews.llvm.org/D135525

Added: 
    llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll

Modified: 
    llvm/include/llvm/IR/IntrinsicsLoongArch.td
    llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
    llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
    llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
    llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index 7b4b0b31e4ff0..9f25b6d87354a 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -33,4 +33,6 @@ defm int_loongarch_masked_atomicrmw_xchg : MaskedAtomicRMWIntrinsics;
 defm int_loongarch_masked_atomicrmw_add : MaskedAtomicRMWIntrinsics;
 defm int_loongarch_masked_atomicrmw_sub : MaskedAtomicRMWIntrinsics;
 defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics;
+defm int_loongarch_masked_atomicrmw_umax : MaskedAtomicRMWIntrinsics;
+defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
 } // TargetPrefix = "loongarch"

diff  --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
index d89b9ff67e79f..59cb615a02401 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -51,6 +51,10 @@ class LoongArchExpandAtomicPseudo : public MachineFunctionPass {
                          MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
                          bool IsMasked, int Width,
                          MachineBasicBlock::iterator &NextMBBI);
+  bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            AtomicRMWInst::BinOp, bool IsMasked, int Width,
+                            MachineBasicBlock::iterator &NextMBBI);
 };
 
 char LoongArchExpandAtomicPseudo::ID = 0;
@@ -114,6 +118,12 @@ bool LoongArchExpandAtomicPseudo::expandMI(
   case LoongArch::PseudoAtomicLoadXor32:
     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
                              NextMBBI);
+  case LoongArch::PseudoMaskedAtomicLoadUMax32:
+    return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
+                                NextMBBI);
+  case LoongArch::PseudoMaskedAtomicLoadUMin32:
+    return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
+                                NextMBBI);
   }
   return false;
 }
@@ -134,7 +144,7 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
   //   ll.[w|d] dest, (addr)
   //   binop scratch, dest, val
   //   sc.[w|d] scratch, scratch, (addr)
-  //   beq scratch, zero, loop
+  //   beqz scratch, loop
   BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
   BuildMI(LoopMBB, DL,
           TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
@@ -187,9 +197,8 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
       .addReg(ScratchReg)
       .addReg(AddrReg)
       .addImm(0);
-  BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQ))
+  BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ))
       .addReg(ScratchReg)
-      .addReg(LoongArch::R0)
       .addMBB(LoopMBB);
 }
 
@@ -232,7 +241,7 @@ static void doMaskedAtomicBinOpExpansion(
   //   and scratch, scratch, masktargetdata
   //   xor scratch, destreg, scratch
   //   sc.w scratch, scratch, (alignedaddr)
-  //   beq scratch, zero, loop
+  //   beqz scratch, loop
   BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
   BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg)
       .addReg(AddrReg)
@@ -272,9 +281,8 @@ static void doMaskedAtomicBinOpExpansion(
       .addReg(ScratchReg)
       .addReg(AddrReg)
       .addImm(0);
-  BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQ))
+  BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ))
       .addReg(ScratchReg)
-      .addReg(LoongArch::R0)
       .addMBB(LoopMBB);
 }
 
@@ -316,6 +324,114 @@ bool LoongArchExpandAtomicPseudo::expandAtomicBinOp(
   return true;
 }
 
+bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+    MachineBasicBlock::iterator &NextMBBI) {
+  assert(IsMasked == true &&
+         "Should only need to expand masked atomic max/min");
+  assert(Width == 32 && "Should never need to expand masked 64-bit operations");
+
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  MachineFunction *MF = MBB.getParent();
+  auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+  auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+  // Insert new MBBs.
+  MF->insert(++MBB.getIterator(), LoopHeadMBB);
+  MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+  MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+  MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+  // Set up successors and transfer remaining instructions to DoneMBB.
+  LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+  LoopHeadMBB->addSuccessor(LoopTailMBB);
+  LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+  LoopTailMBB->addSuccessor(LoopHeadMBB);
+  LoopTailMBB->addSuccessor(DoneMBB);
+  DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+  DoneMBB->transferSuccessors(&MBB);
+  MBB.addSuccessor(LoopHeadMBB);
+
+  Register DestReg = MI.getOperand(0).getReg();
+  Register Scratch1Reg = MI.getOperand(1).getReg();
+  Register Scratch2Reg = MI.getOperand(2).getReg();
+  Register AddrReg = MI.getOperand(3).getReg();
+  Register IncrReg = MI.getOperand(4).getReg();
+  Register MaskReg = MI.getOperand(5).getReg();
+
+  //
+  // .loophead:
+  //   dbar 0
+  //   ll.w destreg, (alignedaddr)
+  //   and scratch2, destreg, mask
+  //   move scratch1, destreg
+  BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
+  BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
+      .addReg(AddrReg)
+      .addImm(0);
+  BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg)
+      .addReg(DestReg)
+      .addReg(MaskReg);
+  BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg)
+      .addReg(DestReg)
+      .addReg(LoongArch::R0);
+
+  switch (BinOp) {
+  default:
+    llvm_unreachable("Unexpected AtomicRMW BinOp");
+  // bgeu scratch2, incr, .looptail
+  case AtomicRMWInst::UMax:
+    BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
+        .addReg(Scratch2Reg)
+        .addReg(IncrReg)
+        .addMBB(LoopTailMBB);
+    break;
+  // bgeu incr, scratch2, .looptail
+  case AtomicRMWInst::UMin:
+    BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
+        .addReg(IncrReg)
+        .addReg(Scratch2Reg)
+        .addMBB(LoopTailMBB);
+    break;
+    // TODO: support other AtomicRMWInst.
+  }
+
+  // .loopifbody:
+  //   xor scratch1, destreg, incr
+  //   and scratch1, scratch1, mask
+  //   xor scratch1, destreg, scratch1
+  insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
+                    MaskReg, Scratch1Reg);
+
+  // .looptail:
+  //   sc.w scratch1, scratch1, (addr)
+  //   beqz scratch1, loop
+  //   dbar 0x700
+  BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
+      .addReg(Scratch1Reg)
+      .addReg(AddrReg)
+      .addImm(0);
+  BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
+      .addReg(Scratch1Reg)
+      .addMBB(LoopHeadMBB);
+  BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+  computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
+  computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+  computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+  return true;
+}
+
 } // end namespace
 
 INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo",

diff  --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 51ab8696391ee..2dd677898a882 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2007,6 +2007,10 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
       return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
     case AtomicRMWInst::Nand:
       return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
+    case AtomicRMWInst::UMax:
+      return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
+    case AtomicRMWInst::UMin:
+      return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
       // TODO: support other AtomicRMWInst.
     }
   }

diff  --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index d52968bccc818..880070846429a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1158,6 +1158,20 @@ def PseudoAtomicLoadAnd32 : PseudoAM;
 def PseudoAtomicLoadOr32 : PseudoAM;
 def PseudoAtomicLoadXor32 : PseudoAM;
 
+
+class PseudoMaskedAMUMinUMax
+    : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+             (ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$ordering), []> {
+  let Constraints = "@earlyclobber $res, at earlyclobber $scratch1,"
+                    "@earlyclobber $scratch2";
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 0;
+}
+
+def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMUMinUMax;
+def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMUMinUMax;
+
 class AtomicPat<Intrinsic intrin, Pseudo AMInst>
     : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
           (AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
@@ -1197,6 +1211,20 @@ def : Pat<(atomic_load_xor_32 GPR:$rj, GPR:$rk),
           (AMXOR_DB_W GPR:$rk, GPR:$rj)>;
 def : Pat<(atomic_load_xor_64 GPR:$rj, GPR:$rk),
           (AMXOR_DB_D GPR:$rk, GPR:$rj)>;
+
+def : Pat<(atomic_load_umin_32 GPR:$rj, GPR:$rk),
+          (AMMIN_DB_WU GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_umin_64 GPR:$rj, GPR:$rk),
+          (AMMIN_DB_DU GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_umax_32 GPR:$rj, GPR:$rk),
+          (AMMAX_DB_WU GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_umax_64 GPR:$rj, GPR:$rk),
+          (AMMAX_DB_DU GPR:$rk, GPR:$rj)>;
+
+def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
+                PseudoMaskedAtomicLoadUMax32>;
+def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
+                PseudoMaskedAtomicLoadUMin32>;
 } // Predicates = [IsLA64]
 
 def : Pat<(atomic_load_nand_32 GPR:$rj, GPR:$rk),

diff  --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
new file mode 100644
index 0000000000000..fcbed0edb3d16
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
@@ -0,0 +1,178 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
+
+;; TODO: Testing for LA32 architecture will be added later
+
+define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i8_acquire:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a2, $zero, -4
+; LA64-NEXT:    and $a2, $a0, $a2
+; LA64-NEXT:    slli.d $a0, $a0, 3
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a0
+; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a0
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:  .LBB0_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT:    dbar 0
+; LA64-NEXT:    ll.w $a4, $a2, 0
+; LA64-NEXT:    and $a6, $a4, $a3
+; LA64-NEXT:    move $a5, $a4
+; LA64-NEXT:    bgeu $a6, $a1, .LBB0_3
+; LA64-NEXT:  # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; LA64-NEXT:    xor $a5, $a4, $a1
+; LA64-NEXT:    and $a5, $a5, $a3
+; LA64-NEXT:    xor $a5, $a4, $a5
+; LA64-NEXT:  .LBB0_3: # in Loop: Header=BB0_1 Depth=1
+; LA64-NEXT:    sc.w $a5, $a2, 0
+; LA64-NEXT:    beqz $a5, .LBB0_1
+; LA64-NEXT:    dbar 1792
+; LA64-NEXT:  # %bb.4:
+; LA64-NEXT:    srl.w $a0, $a4, $a0
+; LA64-NEXT:    ret
+  %1 = atomicrmw umax ptr %a, i8 %b acquire
+  ret i8 %1
+}
+
+define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i16_acquire:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a2, $zero, -4
+; LA64-NEXT:    and $a2, $a0, $a2
+; LA64-NEXT:    slli.d $a0, $a0, 3
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a0
+; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a0
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:  .LBB1_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT:    dbar 0
+; LA64-NEXT:    ll.w $a4, $a2, 0
+; LA64-NEXT:    and $a6, $a4, $a3
+; LA64-NEXT:    move $a5, $a4
+; LA64-NEXT:    bgeu $a6, $a1, .LBB1_3
+; LA64-NEXT:  # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; LA64-NEXT:    xor $a5, $a4, $a1
+; LA64-NEXT:    and $a5, $a5, $a3
+; LA64-NEXT:    xor $a5, $a4, $a5
+; LA64-NEXT:  .LBB1_3: # in Loop: Header=BB1_1 Depth=1
+; LA64-NEXT:    sc.w $a5, $a2, 0
+; LA64-NEXT:    beqz $a5, .LBB1_1
+; LA64-NEXT:    dbar 1792
+; LA64-NEXT:  # %bb.4:
+; LA64-NEXT:    srl.w $a0, $a4, $a0
+; LA64-NEXT:    ret
+  %1 = atomicrmw umax ptr %a, i16 %b acquire
+  ret i16 %1
+}
+
+define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i32_acquire:
+; LA64:       # %bb.0:
+; LA64-NEXT:    ammax_db.wu $a2, $a1, $a0
+; LA64-NEXT:    move $a0, $a2
+; LA64-NEXT:    ret
+  %1 = atomicrmw umax ptr %a, i32 %b acquire
+  ret i32 %1
+}
+
+define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i64_acquire:
+; LA64:       # %bb.0:
+; LA64-NEXT:    ammax_db.du $a2, $a1, $a0
+; LA64-NEXT:    move $a0, $a2
+; LA64-NEXT:    ret
+  %1 = atomicrmw umax ptr %a, i64 %b acquire
+  ret i64 %1
+}
+
+define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i8_acquire:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a2, $zero, -4
+; LA64-NEXT:    and $a2, $a0, $a2
+; LA64-NEXT:    slli.d $a0, $a0, 3
+; LA64-NEXT:    ori $a3, $zero, 255
+; LA64-NEXT:    sll.w $a3, $a3, $a0
+; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    andi $a1, $a1, 255
+; LA64-NEXT:    sll.w $a1, $a1, $a0
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:  .LBB4_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT:    dbar 0
+; LA64-NEXT:    ll.w $a4, $a2, 0
+; LA64-NEXT:    and $a6, $a4, $a3
+; LA64-NEXT:    move $a5, $a4
+; LA64-NEXT:    bgeu $a1, $a6, .LBB4_3
+; LA64-NEXT:  # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; LA64-NEXT:    xor $a5, $a4, $a1
+; LA64-NEXT:    and $a5, $a5, $a3
+; LA64-NEXT:    xor $a5, $a4, $a5
+; LA64-NEXT:  .LBB4_3: # in Loop: Header=BB4_1 Depth=1
+; LA64-NEXT:    sc.w $a5, $a2, 0
+; LA64-NEXT:    beqz $a5, .LBB4_1
+; LA64-NEXT:    dbar 1792
+; LA64-NEXT:  # %bb.4:
+; LA64-NEXT:    srl.w $a0, $a4, $a0
+; LA64-NEXT:    ret
+  %1 = atomicrmw umin ptr %a, i8 %b acquire
+  ret i8 %1
+}
+
+define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i16_acquire:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a2, $zero, -4
+; LA64-NEXT:    and $a2, $a0, $a2
+; LA64-NEXT:    slli.d $a0, $a0, 3
+; LA64-NEXT:    lu12i.w $a3, 15
+; LA64-NEXT:    ori $a3, $a3, 4095
+; LA64-NEXT:    sll.w $a3, $a3, $a0
+; LA64-NEXT:    addi.w $a3, $a3, 0
+; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT:    sll.w $a1, $a1, $a0
+; LA64-NEXT:    addi.w $a1, $a1, 0
+; LA64-NEXT:  .LBB5_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT:    dbar 0
+; LA64-NEXT:    ll.w $a4, $a2, 0
+; LA64-NEXT:    and $a6, $a4, $a3
+; LA64-NEXT:    move $a5, $a4
+; LA64-NEXT:    bgeu $a1, $a6, .LBB5_3
+; LA64-NEXT:  # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; LA64-NEXT:    xor $a5, $a4, $a1
+; LA64-NEXT:    and $a5, $a5, $a3
+; LA64-NEXT:    xor $a5, $a4, $a5
+; LA64-NEXT:  .LBB5_3: # in Loop: Header=BB5_1 Depth=1
+; LA64-NEXT:    sc.w $a5, $a2, 0
+; LA64-NEXT:    beqz $a5, .LBB5_1
+; LA64-NEXT:    dbar 1792
+; LA64-NEXT:  # %bb.4:
+; LA64-NEXT:    srl.w $a0, $a4, $a0
+; LA64-NEXT:    ret
+  %1 = atomicrmw umin ptr %a, i16 %b acquire
+  ret i16 %1
+}
+
+define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i32_acquire:
+; LA64:       # %bb.0:
+; LA64-NEXT:    ammin_db.wu $a2, $a1, $a0
+; LA64-NEXT:    move $a0, $a2
+; LA64-NEXT:    ret
+  %1 = atomicrmw umin ptr %a, i32 %b acquire
+  ret i32 %1
+}
+
+define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i64_acquire:
+; LA64:       # %bb.0:
+; LA64-NEXT:    ammin_db.du $a2, $a1, $a0
+; LA64-NEXT:    move $a0, $a2
+; LA64-NEXT:    ret
+  %1 = atomicrmw umin ptr %a, i64 %b acquire
+  ret i64 %1
+}

diff  --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
index 5f9695cc792b6..9761a402f9433 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
@@ -20,7 +20,7 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a2, 0
-; LA32-NEXT:    beq $a5, $zero, .LBB0_1
+; LA32-NEXT:    beqz $a5, .LBB0_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a4, $a0
 ; LA32-NEXT:    ret
@@ -44,7 +44,7 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a2, 0
-; LA64-NEXT:    beq $a5, $zero, .LBB0_1
+; LA64-NEXT:    beqz $a5, .LBB0_1
 ; LA64-NEXT:  # %bb.2:
 ; LA64-NEXT:    srl.w $a0, $a4, $a0
 ; LA64-NEXT:    ret
@@ -71,7 +71,7 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a2, 0
-; LA32-NEXT:    beq $a5, $zero, .LBB1_1
+; LA32-NEXT:    beqz $a5, .LBB1_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a4, $a0
 ; LA32-NEXT:    ret
@@ -96,7 +96,7 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a2, 0
-; LA64-NEXT:    beq $a5, $zero, .LBB1_1
+; LA64-NEXT:    beqz $a5, .LBB1_1
 ; LA64-NEXT:  # %bb.2:
 ; LA64-NEXT:    srl.w $a0, $a4, $a0
 ; LA64-NEXT:    ret
@@ -112,7 +112,7 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind {
 ; LA32-NEXT:    ll.w $a2, $a1, 0
 ; LA32-NEXT:    move $a3, $a0
 ; LA32-NEXT:    sc.w $a3, $a1, 0
-; LA32-NEXT:    beq $a3, $zero, .LBB2_1
+; LA32-NEXT:    beqz $a3, .LBB2_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
@@ -164,7 +164,7 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a2, 0
-; LA32-NEXT:    beq $a5, $zero, .LBB4_1
+; LA32-NEXT:    beqz $a5, .LBB4_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a4, $a0
 ; LA32-NEXT:    ret
@@ -188,7 +188,7 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a2, 0
-; LA64-NEXT:    beq $a5, $zero, .LBB4_1
+; LA64-NEXT:    beqz $a5, .LBB4_1
 ; LA64-NEXT:  # %bb.2:
 ; LA64-NEXT:    srl.w $a0, $a4, $a0
 ; LA64-NEXT:    ret
@@ -215,7 +215,7 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a2, 0
-; LA32-NEXT:    beq $a5, $zero, .LBB5_1
+; LA32-NEXT:    beqz $a5, .LBB5_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a4, $a0
 ; LA32-NEXT:    ret
@@ -240,7 +240,7 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a2, 0
-; LA64-NEXT:    beq $a5, $zero, .LBB5_1
+; LA64-NEXT:    beqz $a5, .LBB5_1
 ; LA64-NEXT:  # %bb.2:
 ; LA64-NEXT:    srl.w $a0, $a4, $a0
 ; LA64-NEXT:    ret
@@ -256,7 +256,7 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind {
 ; LA32-NEXT:    ll.w $a2, $a1, 0
 ; LA32-NEXT:    add.w $a3, $a2, $a0
 ; LA32-NEXT:    sc.w $a3, $a1, 0
-; LA32-NEXT:    beq $a3, $zero, .LBB6_1
+; LA32-NEXT:    beqz $a3, .LBB6_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
@@ -308,7 +308,7 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a2, 0
-; LA32-NEXT:    beq $a5, $zero, .LBB8_1
+; LA32-NEXT:    beqz $a5, .LBB8_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a4, $a0
 ; LA32-NEXT:    ret
@@ -332,7 +332,7 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a2, 0
-; LA64-NEXT:    beq $a5, $zero, .LBB8_1
+; LA64-NEXT:    beqz $a5, .LBB8_1
 ; LA64-NEXT:  # %bb.2:
 ; LA64-NEXT:    srl.w $a0, $a4, $a0
 ; LA64-NEXT:    ret
@@ -359,7 +359,7 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a2, 0
-; LA32-NEXT:    beq $a5, $zero, .LBB9_1
+; LA32-NEXT:    beqz $a5, .LBB9_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a4, $a0
 ; LA32-NEXT:    ret
@@ -384,7 +384,7 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a2, 0
-; LA64-NEXT:    beq $a5, $zero, .LBB9_1
+; LA64-NEXT:    beqz $a5, .LBB9_1
 ; LA64-NEXT:  # %bb.2:
 ; LA64-NEXT:    srl.w $a0, $a4, $a0
 ; LA64-NEXT:    ret
@@ -400,7 +400,7 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind {
 ; LA32-NEXT:    ll.w $a2, $a1, 0
 ; LA32-NEXT:    sub.w $a3, $a2, $a0
 ; LA32-NEXT:    sc.w $a3, $a1, 0
-; LA32-NEXT:    beq $a3, $zero, .LBB10_1
+; LA32-NEXT:    beqz $a3, .LBB10_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
@@ -455,7 +455,7 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a2, 0
-; LA32-NEXT:    beq $a5, $zero, .LBB12_1
+; LA32-NEXT:    beqz $a5, .LBB12_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a4, $a0
 ; LA32-NEXT:    ret
@@ -480,7 +480,7 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a2, 0
-; LA64-NEXT:    beq $a5, $zero, .LBB12_1
+; LA64-NEXT:    beqz $a5, .LBB12_1
 ; LA64-NEXT:  # %bb.2:
 ; LA64-NEXT:    srl.w $a0, $a4, $a0
 ; LA64-NEXT:    ret
@@ -508,7 +508,7 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-NEXT:    and $a5, $a5, $a3
 ; LA32-NEXT:    xor $a5, $a4, $a5
 ; LA32-NEXT:    sc.w $a5, $a2, 0
-; LA32-NEXT:    beq $a5, $zero, .LBB13_1
+; LA32-NEXT:    beqz $a5, .LBB13_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a4, $a0
 ; LA32-NEXT:    ret
@@ -534,7 +534,7 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA64-NEXT:    and $a5, $a5, $a3
 ; LA64-NEXT:    xor $a5, $a4, $a5
 ; LA64-NEXT:    sc.w $a5, $a2, 0
-; LA64-NEXT:    beq $a5, $zero, .LBB13_1
+; LA64-NEXT:    beqz $a5, .LBB13_1
 ; LA64-NEXT:  # %bb.2:
 ; LA64-NEXT:    srl.w $a0, $a4, $a0
 ; LA64-NEXT:    ret
@@ -551,7 +551,7 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
 ; LA32-NEXT:    and $a3, $a2, $a0
 ; LA32-NEXT:    xori $a3, $a3, -1
 ; LA32-NEXT:    sc.w $a3, $a1, 0
-; LA32-NEXT:    beq $a3, $zero, .LBB14_1
+; LA32-NEXT:    beqz $a3, .LBB14_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
@@ -564,7 +564,7 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
 ; LA64-NEXT:    and $a3, $a2, $a0
 ; LA64-NEXT:    xori $a3, $a3, -1
 ; LA64-NEXT:    sc.w $a3, $a1, 0
-; LA64-NEXT:    beq $a3, $zero, .LBB14_1
+; LA64-NEXT:    beqz $a3, .LBB14_1
 ; LA64-NEXT:  # %bb.2:
 ; LA64-NEXT:    move $a0, $a2
 ; LA64-NEXT:    ret
@@ -591,7 +591,7 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind {
 ; LA64-NEXT:    and $a3, $a2, $a0
 ; LA64-NEXT:    xori $a3, $a3, -1
 ; LA64-NEXT:    sc.d $a3, $a1, 0
-; LA64-NEXT:    beq $a3, $zero, .LBB15_1
+; LA64-NEXT:    beqz $a3, .LBB15_1
 ; LA64-NEXT:  # %bb.2:
 ; LA64-NEXT:    move $a0, $a2
 ; LA64-NEXT:    ret
@@ -615,7 +615,7 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-NEXT:    ll.w $a3, $a1, 0
 ; LA32-NEXT:    and $a4, $a3, $a0
 ; LA32-NEXT:    sc.w $a4, $a1, 0
-; LA32-NEXT:    beq $a4, $zero, .LBB16_1
+; LA32-NEXT:    beqz $a4, .LBB16_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a3, $a2
 ; LA32-NEXT:    ret
@@ -654,7 +654,7 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-NEXT:    ll.w $a2, $a1, 0
 ; LA32-NEXT:    and $a4, $a2, $a0
 ; LA32-NEXT:    sc.w $a4, $a1, 0
-; LA32-NEXT:    beq $a4, $zero, .LBB17_1
+; LA32-NEXT:    beqz $a4, .LBB17_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a2, $a3
 ; LA32-NEXT:    ret
@@ -685,7 +685,7 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind {
 ; LA32-NEXT:    ll.w $a2, $a1, 0
 ; LA32-NEXT:    and $a3, $a2, $a0
 ; LA32-NEXT:    sc.w $a3, $a1, 0
-; LA32-NEXT:    beq $a3, $zero, .LBB18_1
+; LA32-NEXT:    beqz $a3, .LBB18_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
@@ -732,7 +732,7 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-NEXT:    ll.w $a3, $a1, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a1, 0
-; LA32-NEXT:    beq $a4, $zero, .LBB20_1
+; LA32-NEXT:    beqz $a4, .LBB20_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a3, $a0
 ; LA32-NEXT:    ret
@@ -764,7 +764,7 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-NEXT:    ll.w $a3, $a1, 0
 ; LA32-NEXT:    or $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a1, 0
-; LA32-NEXT:    beq $a4, $zero, .LBB21_1
+; LA32-NEXT:    beqz $a4, .LBB21_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a3, $a0
 ; LA32-NEXT:    ret
@@ -791,7 +791,7 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind {
 ; LA32-NEXT:    ll.w $a2, $a1, 0
 ; LA32-NEXT:    or $a3, $a2, $a0
 ; LA32-NEXT:    sc.w $a3, $a1, 0
-; LA32-NEXT:    beq $a3, $zero, .LBB22_1
+; LA32-NEXT:    beqz $a3, .LBB22_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret
@@ -838,7 +838,7 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
 ; LA32-NEXT:    ll.w $a3, $a1, 0
 ; LA32-NEXT:    xor $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a1, 0
-; LA32-NEXT:    beq $a4, $zero, .LBB24_1
+; LA32-NEXT:    beqz $a4, .LBB24_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a3, $a0
 ; LA32-NEXT:    ret
@@ -870,7 +870,7 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
 ; LA32-NEXT:    ll.w $a3, $a1, 0
 ; LA32-NEXT:    xor $a4, $a3, $a2
 ; LA32-NEXT:    sc.w $a4, $a1, 0
-; LA32-NEXT:    beq $a4, $zero, .LBB25_1
+; LA32-NEXT:    beqz $a4, .LBB25_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    srl.w $a0, $a3, $a0
 ; LA32-NEXT:    ret
@@ -897,7 +897,7 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind {
 ; LA32-NEXT:    ll.w $a2, $a1, 0
 ; LA32-NEXT:    xor $a3, $a2, $a0
 ; LA32-NEXT:    sc.w $a3, $a1, 0
-; LA32-NEXT:    beq $a3, $zero, .LBB26_1
+; LA32-NEXT:    beqz $a3, .LBB26_1
 ; LA32-NEXT:  # %bb.2:
 ; LA32-NEXT:    move $a0, $a2
 ; LA32-NEXT:    ret


        


More information about the llvm-commits mailing list