[llvm] e632bb6 - [LoongArch] Add codegen support for atomicrmw umin/umax operation on LA64
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 14 00:41:27 PDT 2022
Author: gonglingqin
Date: 2022-10-14T15:24:43+08:00
New Revision: e632bb65436ac8c4625243b53666750bc3eda43a
URL: https://github.com/llvm/llvm-project/commit/e632bb65436ac8c4625243b53666750bc3eda43a
DIFF: https://github.com/llvm/llvm-project/commit/e632bb65436ac8c4625243b53666750bc3eda43a.diff
LOG: [LoongArch] Add codegen support for atomicrmw umin/umax operation on LA64
Furthermore, use `beqz $rd, .BB` instead of `beq $rd, $zero, .BB`.
Differential Revision: https://reviews.llvm.org/D135525
Added:
llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
Modified:
llvm/include/llvm/IR/IntrinsicsLoongArch.td
llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index 7b4b0b31e4ff0..9f25b6d87354a 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -33,4 +33,6 @@ defm int_loongarch_masked_atomicrmw_xchg : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_add : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_sub : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics;
+defm int_loongarch_masked_atomicrmw_umax : MaskedAtomicRMWIntrinsics;
+defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
} // TargetPrefix = "loongarch"
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
index d89b9ff67e79f..59cb615a02401 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -51,6 +51,10 @@ class LoongArchExpandAtomicPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
bool IsMasked, int Width,
MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
};
char LoongArchExpandAtomicPseudo::ID = 0;
@@ -114,6 +118,12 @@ bool LoongArchExpandAtomicPseudo::expandMI(
case LoongArch::PseudoAtomicLoadXor32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
NextMBBI);
+ case LoongArch::PseudoMaskedAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
+ NextMBBI);
+ case LoongArch::PseudoMaskedAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
+ NextMBBI);
}
return false;
}
@@ -134,7 +144,7 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
// ll.[w|d] dest, (addr)
// binop scratch, dest, val
// sc.[w|d] scratch, scratch, (addr)
- // beq scratch, zero, loop
+ // beqz scratch, loop
BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopMBB, DL,
TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
@@ -187,9 +197,8 @@ static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
.addReg(ScratchReg)
.addReg(AddrReg)
.addImm(0);
- BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQ))
+ BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ))
.addReg(ScratchReg)
- .addReg(LoongArch::R0)
.addMBB(LoopMBB);
}
@@ -232,7 +241,7 @@ static void doMaskedAtomicBinOpExpansion(
// and scratch, scratch, masktargetdata
// xor scratch, destreg, scratch
// sc.w scratch, scratch, (alignedaddr)
- // beq scratch, zero, loop
+ // beqz scratch, loop
BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg)
.addReg(AddrReg)
@@ -272,9 +281,8 @@ static void doMaskedAtomicBinOpExpansion(
.addReg(ScratchReg)
.addReg(AddrReg)
.addImm(0);
- BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQ))
+ BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ))
.addReg(ScratchReg)
- .addReg(LoongArch::R0)
.addMBB(LoopMBB);
}
@@ -316,6 +324,114 @@ bool LoongArchExpandAtomicPseudo::expandAtomicBinOp(
return true;
}
+bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
+ MachineBasicBlock::iterator &NextMBBI) {
+ assert(IsMasked == true &&
+ "Should only need to expand masked atomic max/min");
+ assert(Width == 32 && "Should never need to expand masked 64-bit operations");
+
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
+ MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopIfBodyMBB->addSuccessor(LoopTailMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ Register DestReg = MI.getOperand(0).getReg();
+ Register Scratch1Reg = MI.getOperand(1).getReg();
+ Register Scratch2Reg = MI.getOperand(2).getReg();
+ Register AddrReg = MI.getOperand(3).getReg();
+ Register IncrReg = MI.getOperand(4).getReg();
+ Register MaskReg = MI.getOperand(5).getReg();
+
+ //
+ // .loophead:
+ // dbar 0
+ // ll.w destreg, (alignedaddr)
+ // and scratch2, destreg, mask
+ // move scratch1, destreg
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
+ .addReg(AddrReg)
+ .addImm(0);
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg)
+ .addReg(DestReg)
+ .addReg(LoongArch::R0);
+
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unexpected AtomicRMW BinOp");
+ // bgeu scratch2, incr, .looptail
+ case AtomicRMWInst::UMax:
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
+ .addReg(Scratch2Reg)
+ .addReg(IncrReg)
+ .addMBB(LoopTailMBB);
+ break;
+ // bgeu incr, scratch2, .looptail
+ case AtomicRMWInst::UMin:
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
+ .addReg(IncrReg)
+ .addReg(Scratch2Reg)
+ .addMBB(LoopTailMBB);
+ break;
+ // TODO: support other AtomicRMWInst.
+ }
+
+ // .loopifbody:
+ // xor scratch1, destreg, incr
+ // and scratch1, scratch1, mask
+ // xor scratch1, destreg, scratch1
+ insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
+ MaskReg, Scratch1Reg);
+
+ // .looptail:
+ // sc.w scratch1, scratch1, (addr)
+ // beqz scratch1, loop
+ // dbar 0x700
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
+ .addReg(Scratch1Reg)
+ .addReg(AddrReg)
+ .addImm(0);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
+ .addReg(Scratch1Reg)
+ .addMBB(LoopHeadMBB);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
} // end namespace
INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo",
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 51ab8696391ee..2dd677898a882 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2007,6 +2007,10 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
case AtomicRMWInst::Nand:
return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
// TODO: support other AtomicRMWInst.
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index d52968bccc818..880070846429a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1158,6 +1158,20 @@ def PseudoAtomicLoadAnd32 : PseudoAM;
def PseudoAtomicLoadOr32 : PseudoAM;
def PseudoAtomicLoadXor32 : PseudoAM;
+
+class PseudoMaskedAMUMinUMax
+ : Pseudo<(outs GPR:$res, GPR:$scratch1, GPR:$scratch2),
+ (ins GPR:$addr, GPR:$incr, GPR:$mask, grlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res, at earlyclobber $scratch1,"
+ "@earlyclobber $scratch2";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMUMinUMax;
+def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMUMinUMax;
+
class AtomicPat<Intrinsic intrin, Pseudo AMInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
(AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
@@ -1197,6 +1211,20 @@ def : Pat<(atomic_load_xor_32 GPR:$rj, GPR:$rk),
(AMXOR_DB_W GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_xor_64 GPR:$rj, GPR:$rk),
(AMXOR_DB_D GPR:$rk, GPR:$rj)>;
+
+def : Pat<(atomic_load_umin_32 GPR:$rj, GPR:$rk),
+ (AMMIN_DB_WU GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_umin_64 GPR:$rj, GPR:$rk),
+ (AMMIN_DB_DU GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_umax_32 GPR:$rj, GPR:$rk),
+ (AMMAX_DB_WU GPR:$rk, GPR:$rj)>;
+def : Pat<(atomic_load_umax_64 GPR:$rj, GPR:$rk),
+ (AMMAX_DB_DU GPR:$rk, GPR:$rj)>;
+
+def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
+ PseudoMaskedAtomicLoadUMax32>;
+def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
+ PseudoMaskedAtomicLoadUMin32>;
} // Predicates = [IsLA64]
def : Pat<(atomic_load_nand_32 GPR:$rj, GPR:$rk),
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
new file mode 100644
index 0000000000000..fcbed0edb3d16
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
@@ -0,0 +1,178 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
+
+;; TODO: Testing for LA32 architecture will be added later
+
+define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB0_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: beqz $a5, .LBB0_1
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a6, $a1, .LBB1_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: beqz $a5, .LBB1_1
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i32_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammax_db.wu $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_umax_i64_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammax_db.du $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw umax ptr %a, i64 %b acquire
+ ret i64 %1
+}
+
+define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i8_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: ori $a3, $zero, 255
+; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB4_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB4_3: # in Loop: Header=BB4_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: beqz $a5, .LBB4_1
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i8 %b acquire
+ ret i8 %1
+}
+
+define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i16_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a2, $zero, -4
+; LA64-NEXT: and $a2, $a0, $a2
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: lu12i.w $a3, 15
+; LA64-NEXT: ori $a3, $a3, 4095
+; LA64-NEXT: sll.w $a3, $a3, $a0
+; LA64-NEXT: addi.w $a3, $a3, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: ll.w $a4, $a2, 0
+; LA64-NEXT: and $a6, $a4, $a3
+; LA64-NEXT: move $a5, $a4
+; LA64-NEXT: bgeu $a1, $a6, .LBB5_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; LA64-NEXT: xor $a5, $a4, $a1
+; LA64-NEXT: and $a5, $a5, $a3
+; LA64-NEXT: xor $a5, $a4, $a5
+; LA64-NEXT: .LBB5_3: # in Loop: Header=BB5_1 Depth=1
+; LA64-NEXT: sc.w $a5, $a2, 0
+; LA64-NEXT: beqz $a5, .LBB5_1
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: # %bb.4:
+; LA64-NEXT: srl.w $a0, $a4, $a0
+; LA64-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i16 %b acquire
+ ret i16 %1
+}
+
+define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i32_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammin_db.wu $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i32 %b acquire
+ ret i32 %1
+}
+
+define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA64-LABEL: atomicrmw_umin_i64_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: ammin_db.du $a2, $a1, $a0
+; LA64-NEXT: move $a0, $a2
+; LA64-NEXT: ret
+ %1 = atomicrmw umin ptr %a, i64 %b acquire
+ ret i64 %1
+}
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
index 5f9695cc792b6..9761a402f9433 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw.ll
@@ -20,7 +20,7 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: sc.w $a5, $a2, 0
-; LA32-NEXT: beq $a5, $zero, .LBB0_1
+; LA32-NEXT: beqz $a5, .LBB0_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a4, $a0
; LA32-NEXT: ret
@@ -44,7 +44,7 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: sc.w $a5, $a2, 0
-; LA64-NEXT: beq $a5, $zero, .LBB0_1
+; LA64-NEXT: beqz $a5, .LBB0_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: srl.w $a0, $a4, $a0
; LA64-NEXT: ret
@@ -71,7 +71,7 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: sc.w $a5, $a2, 0
-; LA32-NEXT: beq $a5, $zero, .LBB1_1
+; LA32-NEXT: beqz $a5, .LBB1_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a4, $a0
; LA32-NEXT: ret
@@ -96,7 +96,7 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: sc.w $a5, $a2, 0
-; LA64-NEXT: beq $a5, $zero, .LBB1_1
+; LA64-NEXT: beqz $a5, .LBB1_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: srl.w $a0, $a4, $a0
; LA64-NEXT: ret
@@ -112,7 +112,7 @@ define i32 @atomicrmw_xchg_i32_acquire(ptr %a, i32 %b) nounwind {
; LA32-NEXT: ll.w $a2, $a1, 0
; LA32-NEXT: move $a3, $a0
; LA32-NEXT: sc.w $a3, $a1, 0
-; LA32-NEXT: beq $a3, $zero, .LBB2_1
+; LA32-NEXT: beqz $a3, .LBB2_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
@@ -164,7 +164,7 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: sc.w $a5, $a2, 0
-; LA32-NEXT: beq $a5, $zero, .LBB4_1
+; LA32-NEXT: beqz $a5, .LBB4_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a4, $a0
; LA32-NEXT: ret
@@ -188,7 +188,7 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: sc.w $a5, $a2, 0
-; LA64-NEXT: beq $a5, $zero, .LBB4_1
+; LA64-NEXT: beqz $a5, .LBB4_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: srl.w $a0, $a4, $a0
; LA64-NEXT: ret
@@ -215,7 +215,7 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: sc.w $a5, $a2, 0
-; LA32-NEXT: beq $a5, $zero, .LBB5_1
+; LA32-NEXT: beqz $a5, .LBB5_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a4, $a0
; LA32-NEXT: ret
@@ -240,7 +240,7 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: sc.w $a5, $a2, 0
-; LA64-NEXT: beq $a5, $zero, .LBB5_1
+; LA64-NEXT: beqz $a5, .LBB5_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: srl.w $a0, $a4, $a0
; LA64-NEXT: ret
@@ -256,7 +256,7 @@ define i32 @atomicrmw_add_i32_acquire(ptr %a, i32 %b) nounwind {
; LA32-NEXT: ll.w $a2, $a1, 0
; LA32-NEXT: add.w $a3, $a2, $a0
; LA32-NEXT: sc.w $a3, $a1, 0
-; LA32-NEXT: beq $a3, $zero, .LBB6_1
+; LA32-NEXT: beqz $a3, .LBB6_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
@@ -308,7 +308,7 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: sc.w $a5, $a2, 0
-; LA32-NEXT: beq $a5, $zero, .LBB8_1
+; LA32-NEXT: beqz $a5, .LBB8_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a4, $a0
; LA32-NEXT: ret
@@ -332,7 +332,7 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: sc.w $a5, $a2, 0
-; LA64-NEXT: beq $a5, $zero, .LBB8_1
+; LA64-NEXT: beqz $a5, .LBB8_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: srl.w $a0, $a4, $a0
; LA64-NEXT: ret
@@ -359,7 +359,7 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: sc.w $a5, $a2, 0
-; LA32-NEXT: beq $a5, $zero, .LBB9_1
+; LA32-NEXT: beqz $a5, .LBB9_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a4, $a0
; LA32-NEXT: ret
@@ -384,7 +384,7 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: sc.w $a5, $a2, 0
-; LA64-NEXT: beq $a5, $zero, .LBB9_1
+; LA64-NEXT: beqz $a5, .LBB9_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: srl.w $a0, $a4, $a0
; LA64-NEXT: ret
@@ -400,7 +400,7 @@ define i32 @atomicrmw_sub_i32_acquire(ptr %a, i32 %b) nounwind {
; LA32-NEXT: ll.w $a2, $a1, 0
; LA32-NEXT: sub.w $a3, $a2, $a0
; LA32-NEXT: sc.w $a3, $a1, 0
-; LA32-NEXT: beq $a3, $zero, .LBB10_1
+; LA32-NEXT: beqz $a3, .LBB10_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
@@ -455,7 +455,7 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: sc.w $a5, $a2, 0
-; LA32-NEXT: beq $a5, $zero, .LBB12_1
+; LA32-NEXT: beqz $a5, .LBB12_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a4, $a0
; LA32-NEXT: ret
@@ -480,7 +480,7 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: sc.w $a5, $a2, 0
-; LA64-NEXT: beq $a5, $zero, .LBB12_1
+; LA64-NEXT: beqz $a5, .LBB12_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: srl.w $a0, $a4, $a0
; LA64-NEXT: ret
@@ -508,7 +508,7 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-NEXT: and $a5, $a5, $a3
; LA32-NEXT: xor $a5, $a4, $a5
; LA32-NEXT: sc.w $a5, $a2, 0
-; LA32-NEXT: beq $a5, $zero, .LBB13_1
+; LA32-NEXT: beqz $a5, .LBB13_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a4, $a0
; LA32-NEXT: ret
@@ -534,7 +534,7 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; LA64-NEXT: and $a5, $a5, $a3
; LA64-NEXT: xor $a5, $a4, $a5
; LA64-NEXT: sc.w $a5, $a2, 0
-; LA64-NEXT: beq $a5, $zero, .LBB13_1
+; LA64-NEXT: beqz $a5, .LBB13_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: srl.w $a0, $a4, $a0
; LA64-NEXT: ret
@@ -551,7 +551,7 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
; LA32-NEXT: and $a3, $a2, $a0
; LA32-NEXT: xori $a3, $a3, -1
; LA32-NEXT: sc.w $a3, $a1, 0
-; LA32-NEXT: beq $a3, $zero, .LBB14_1
+; LA32-NEXT: beqz $a3, .LBB14_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
@@ -564,7 +564,7 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
; LA64-NEXT: and $a3, $a2, $a0
; LA64-NEXT: xori $a3, $a3, -1
; LA64-NEXT: sc.w $a3, $a1, 0
-; LA64-NEXT: beq $a3, $zero, .LBB14_1
+; LA64-NEXT: beqz $a3, .LBB14_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
@@ -591,7 +591,7 @@ define i64 @atomicrmw_nand_i64_acquire(ptr %a, i64 %b) nounwind {
; LA64-NEXT: and $a3, $a2, $a0
; LA64-NEXT: xori $a3, $a3, -1
; LA64-NEXT: sc.d $a3, $a1, 0
-; LA64-NEXT: beq $a3, $zero, .LBB15_1
+; LA64-NEXT: beqz $a3, .LBB15_1
; LA64-NEXT: # %bb.2:
; LA64-NEXT: move $a0, $a2
; LA64-NEXT: ret
@@ -615,7 +615,7 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-NEXT: ll.w $a3, $a1, 0
; LA32-NEXT: and $a4, $a3, $a0
; LA32-NEXT: sc.w $a4, $a1, 0
-; LA32-NEXT: beq $a4, $zero, .LBB16_1
+; LA32-NEXT: beqz $a4, .LBB16_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a3, $a2
; LA32-NEXT: ret
@@ -654,7 +654,7 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-NEXT: ll.w $a2, $a1, 0
; LA32-NEXT: and $a4, $a2, $a0
; LA32-NEXT: sc.w $a4, $a1, 0
-; LA32-NEXT: beq $a4, $zero, .LBB17_1
+; LA32-NEXT: beqz $a4, .LBB17_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a2, $a3
; LA32-NEXT: ret
@@ -685,7 +685,7 @@ define i32 @atomicrmw_and_i32_acquire(ptr %a, i32 %b) nounwind {
; LA32-NEXT: ll.w $a2, $a1, 0
; LA32-NEXT: and $a3, $a2, $a0
; LA32-NEXT: sc.w $a3, $a1, 0
-; LA32-NEXT: beq $a3, $zero, .LBB18_1
+; LA32-NEXT: beqz $a3, .LBB18_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
@@ -732,7 +732,7 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-NEXT: ll.w $a3, $a1, 0
; LA32-NEXT: or $a4, $a3, $a2
; LA32-NEXT: sc.w $a4, $a1, 0
-; LA32-NEXT: beq $a4, $zero, .LBB20_1
+; LA32-NEXT: beqz $a4, .LBB20_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a3, $a0
; LA32-NEXT: ret
@@ -764,7 +764,7 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-NEXT: ll.w $a3, $a1, 0
; LA32-NEXT: or $a4, $a3, $a2
; LA32-NEXT: sc.w $a4, $a1, 0
-; LA32-NEXT: beq $a4, $zero, .LBB21_1
+; LA32-NEXT: beqz $a4, .LBB21_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a3, $a0
; LA32-NEXT: ret
@@ -791,7 +791,7 @@ define i32 @atomicrmw_or_i32_acquire(ptr %a, i32 %b) nounwind {
; LA32-NEXT: ll.w $a2, $a1, 0
; LA32-NEXT: or $a3, $a2, $a0
; LA32-NEXT: sc.w $a3, $a1, 0
-; LA32-NEXT: beq $a3, $zero, .LBB22_1
+; LA32-NEXT: beqz $a3, .LBB22_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
@@ -838,7 +838,7 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
; LA32-NEXT: ll.w $a3, $a1, 0
; LA32-NEXT: xor $a4, $a3, $a2
; LA32-NEXT: sc.w $a4, $a1, 0
-; LA32-NEXT: beq $a4, $zero, .LBB24_1
+; LA32-NEXT: beqz $a4, .LBB24_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a3, $a0
; LA32-NEXT: ret
@@ -870,7 +870,7 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
; LA32-NEXT: ll.w $a3, $a1, 0
; LA32-NEXT: xor $a4, $a3, $a2
; LA32-NEXT: sc.w $a4, $a1, 0
-; LA32-NEXT: beq $a4, $zero, .LBB25_1
+; LA32-NEXT: beqz $a4, .LBB25_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: srl.w $a0, $a3, $a0
; LA32-NEXT: ret
@@ -897,7 +897,7 @@ define i32 @atomicrmw_xor_i32_acquire(ptr %a, i32 %b) nounwind {
; LA32-NEXT: ll.w $a2, $a1, 0
; LA32-NEXT: xor $a3, $a2, $a0
; LA32-NEXT: sc.w $a3, $a1, 0
-; LA32-NEXT: beq $a3, $zero, .LBB26_1
+; LA32-NEXT: beqz $a3, .LBB26_1
; LA32-NEXT: # %bb.2:
; LA32-NEXT: move $a0, $a2
; LA32-NEXT: ret
More information about the llvm-commits
mailing list