[llvm] [LoongArch] Add codegen support for atomic-ops on LA32 (PR #141557)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 27 00:47:16 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: hev (heiher)
<details>
<summary>Changes</summary>
This patch adds codegen support for atomic operations `cmpxchg`, `max`, `min`, `umax` and `umin` on the LA32 target.
---
Patch is 132.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141557.diff
5 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsLoongArch.td (+4-3)
- (modified) llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp (+56-33)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+18-7)
- (modified) llvm/lib/Target/LoongArch/LoongArchInstrInfo.td (+40-17)
- (modified) llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll (+2622)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index 4621f1689b46e..f5e3d412666a2 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -25,12 +25,13 @@ class MaskedAtomicRMW<LLVMType itype>
multiclass MaskedAtomicRMWIntrinsics {
// i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32 imm);
def _i32 : MaskedAtomicRMW<llvm_i32_ty>;
- // i64 @llvm.<name>.i32.<p>(any*, i64, i64, i64 imm);
+ // i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64 imm);
def _i64 : MaskedAtomicRMW<llvm_i64_ty>;
}
multiclass MaskedAtomicRMWFiveOpIntrinsics {
- // TODO: Support cmpxchg on LA32.
+ // i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32, i32 imm);
+ def _i32 : MaskedAtomicRMWFiveArg<llvm_i32_ty>;
// i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64, i64 imm);
def _i64 : MaskedAtomicRMWFiveArg<llvm_i64_ty>;
}
@@ -44,7 +45,7 @@ defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_max : MaskedAtomicRMWFiveOpIntrinsics;
defm int_loongarch_masked_atomicrmw_min : MaskedAtomicRMWFiveOpIntrinsics;
-// @llvm.loongarch.masked.cmpxchg.i64.<p>(
+// @llvm.loongarch.masked.cmpxchg.<i32,i64>.<p>(
// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering)
defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics;
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
index 3be012feb2385..73874fccc0308 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -122,6 +122,18 @@ bool LoongArchExpandAtomicPseudo::expandMI(
case LoongArch::PseudoAtomicLoadXor32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
NextMBBI);
+ case LoongArch::PseudoAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
+ NextMBBI);
+ case LoongArch::PseudoAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
+ NextMBBI);
+ case LoongArch::PseudoAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
+ NextMBBI);
+ case LoongArch::PseudoAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
+ NextMBBI);
case LoongArch::PseudoMaskedAtomicLoadUMax32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
NextMBBI);
@@ -356,8 +368,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
MachineBasicBlock::iterator &NextMBBI) {
- assert(IsMasked == true &&
- "Should only need to expand masked atomic max/min");
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
MachineInstr &MI = *MBBI;
@@ -385,79 +395,92 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MBB.addSuccessor(LoopHeadMBB);
Register DestReg = MI.getOperand(0).getReg();
- Register Scratch1Reg = MI.getOperand(1).getReg();
- Register Scratch2Reg = MI.getOperand(2).getReg();
- Register AddrReg = MI.getOperand(3).getReg();
- Register IncrReg = MI.getOperand(4).getReg();
- Register MaskReg = MI.getOperand(5).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(IsMasked ? 3 : 2).getReg();
+ Register IncrReg = MI.getOperand(IsMasked ? 4 : 3).getReg();
+ Register CmprReg = DestReg;
//
// .loophead:
// ll.w destreg, (alignedaddr)
- // and scratch2, destreg, mask
- // move scratch1, destreg
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
.addReg(AddrReg)
.addImm(0);
- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg)
- .addReg(DestReg)
- .addReg(MaskReg);
- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg)
+ // and cmpr, destreg, mask
+ if (IsMasked) {
+ Register MaskReg = MI.getOperand(5).getReg();
+ CmprReg = MI.getOperand(2).getReg();
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), CmprReg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ }
+ // move scratch, destreg
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), ScratchReg)
.addReg(DestReg)
.addReg(LoongArch::R0);
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
- // bgeu scratch2, incr, .looptail
+ // bgeu cmpr, incr, .looptail
case AtomicRMWInst::UMax:
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
- // bgeu incr, scratch2, .looptail
+ // bgeu incr, cmpr, .looptail
case AtomicRMWInst::UMin:
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
.addReg(IncrReg)
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::Max:
- insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
- // bge scratch2, incr, .looptail
+ if (IsMasked)
+ insertSext(TII, DL, LoopHeadMBB, CmprReg, MI.getOperand(6).getReg());
+ // bge cmpr, incr, .looptail
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::Min:
- insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
- // bge incr, scratch2, .looptail
+ if (IsMasked)
+ insertSext(TII, DL, LoopHeadMBB, CmprReg, MI.getOperand(6).getReg());
+ // bge incr, cmpr, .looptail
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
.addReg(IncrReg)
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addMBB(LoopTailMBB);
break;
// TODO: support other AtomicRMWInst.
}
// .loopifbody:
- // xor scratch1, destreg, incr
- // and scratch1, scratch1, mask
- // xor scratch1, destreg, scratch1
- insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
- MaskReg, Scratch1Reg);
+ if (IsMasked) {
+ Register MaskReg = MI.getOperand(5).getReg();
+ // xor scratch, destreg, incr
+ // and scratch, scratch, mask
+ // xor scratch, destreg, scratch
+ insertMaskedMerge(TII, DL, LoopIfBodyMBB, ScratchReg, DestReg, IncrReg,
+ MaskReg, ScratchReg);
+ } else {
+ // move scratch, incr
+ BuildMI(LoopIfBodyMBB, DL, TII->get(LoongArch::OR), ScratchReg)
+ .addReg(IncrReg)
+ .addReg(LoongArch::R0);
+ }
// .looptail:
- // sc.w scratch1, scratch1, (addr)
- // beqz scratch1, loop
- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
- .addReg(Scratch1Reg)
+ // sc.w scratch, scratch, (addr)
+ // beqz scratch, loop
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), ScratchReg)
+ .addReg(ScratchReg)
.addReg(AddrReg)
.addImm(0);
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQ))
- .addReg(Scratch1Reg)
+ .addReg(ScratchReg)
.addReg(LoongArch::R0)
.addMBB(LoopHeadMBB);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 9f5c94ddea44f..c96be139340f3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -7070,6 +7070,14 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
case AtomicRMWInst::Nand:
return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
+ case AtomicRMWInst::Max:
+ return Intrinsic::loongarch_masked_atomicrmw_max_i32;
+ case AtomicRMWInst::Min:
+ return Intrinsic::loongarch_masked_atomicrmw_min_i32;
// TODO: support other AtomicRMWInst.
}
}
@@ -7093,19 +7101,22 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
+ unsigned GRLen = Subtarget.getGRLen();
AtomicOrdering FailOrd = CI->getFailureOrdering();
Value *FailureOrdering =
Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
-
- // TODO: Support cmpxchg on LA32.
- Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
- CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
- NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
- Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
+ if (GRLen == 64) {
+ CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
+ CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
+ NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
+ Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ }
Type *Tys[] = {AlignedAddr->getType()};
Value *Result = Builder.CreateIntrinsic(
CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
- Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ if (GRLen == 64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index fcdd9a130d8b6..344f563bd61e8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -2055,6 +2055,10 @@ def PseudoAtomicLoadSub32 : PseudoAM;
def PseudoAtomicLoadAnd32 : PseudoAM;
def PseudoAtomicLoadOr32 : PseudoAM;
def PseudoAtomicLoadXor32 : PseudoAM;
+def PseudoAtomicLoadUMax32 : PseudoAM;
+def PseudoAtomicLoadUMin32 : PseudoAM;
+def PseudoAtomicLoadMax32 : PseudoAM;
+def PseudoAtomicLoadMin32 : PseudoAM;
multiclass PseudoBinPat<string Op, Pseudo BinInst> {
def : Pat<(!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$incr),
@@ -2253,6 +2257,22 @@ def : Pat<(atomic_cmp_swap_i64 GPR:$addr, GPR:$cmp, GPR:$new),
(AMCAS__DB_D GPR:$cmp, GPR:$new, GPR:$addr)>;
}
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
+ ValueType vt = GRLenVT> {
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
+}
+
let Predicates = [IsLA64] in {
defm : binary_atomic_op_wd<"AMSWAP", "atomic_swap">;
defm : binary_atomic_op_wd<"AMADD", "atomic_load_add">;
@@ -2288,23 +2308,6 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
PseudoMaskedAtomicLoadUMin32>;
-// Ordering constants must be kept in sync with the AtomicOrdering enum in
-// AtomicOrdering.h.
-multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
- ValueType vt = GRLenVT> {
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
-}
-
-defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i64", PseudoCmpXchg64, i64>;
def : Pat<(int_loongarch_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
@@ -2317,6 +2320,7 @@ def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i64,
PseudoMaskedAtomicLoadMin32>;
} // Predicates = [IsLA64]
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
defm : PseudoBinPat<"atomic_load_nand_i32", PseudoAtomicLoadNand32>;
let Predicates = [IsLA32] in {
@@ -2329,11 +2333,30 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_sub_i32,
PseudoMaskedAtomicLoadSub32>;
def : AtomicPat<int_loongarch_masked_atomicrmw_nand_i32,
PseudoMaskedAtomicLoadNand32>;
+def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i32,
+ PseudoMaskedAtomicLoadUMax32>;
+def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i32,
+ PseudoMaskedAtomicLoadUMin32>;
+
+def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i32,
+ PseudoMaskedAtomicLoadMax32>;
+def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i32,
+ PseudoMaskedAtomicLoadMin32>;
+
+def : Pat<(int_loongarch_masked_cmpxchg_i32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
+ (PseudoMaskedCmpXchg32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>;
+
defm : PseudoBinPat<"atomic_load_add_i32", PseudoAtomicLoadAdd32>;
defm : PseudoBinPat<"atomic_load_sub_i32", PseudoAtomicLoadSub32>;
defm : PseudoBinPat<"atomic_load_and_i32", PseudoAtomicLoadAnd32>;
defm : PseudoBinPat<"atomic_load_or_i32", PseudoAtomicLoadOr32>;
defm : PseudoBinPat<"atomic_load_xor_i32", PseudoAtomicLoadXor32>;
+defm : PseudoBinPat<"atomic_load_umax_i32", PseudoAtomicLoadUMax32>;
+defm : PseudoBinPat<"atomic_load_umin_i32", PseudoAtomicLoadUMin32>;
+defm : PseudoBinPat<"atomic_load_max_i32", PseudoAtomicLoadMax32>;
+defm : PseudoBinPat<"atomic_load_min_i32", PseudoAtomicLoadMin32>;
} // Predicates = [IsLA32]
/// Intrinsics
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
index 096c2242661c0..a6a0f15f9f4a5 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
@@ -1,10 +1,37 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 -mattr=+d --verify-machineinstrs < %s | \
+; RUN: FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 -mattr=+d --verify-machineinstrs < %s | \
; RUN: FileCheck %s --check-prefix=LA64
;; TODO: Testing for LA32 architecture will be added later
define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i8_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $a2, $zero, -4
+; LA32-NEXT: and $a2, $a0, $a2
+; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: ori $a3, $zero, 255
+; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: andi $a1, $a1, 255
+; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: and $a6, $a4, $a3
+; LA32-NEXT: move $a5, $a4
+; LA32-NEXT: bgeu $a6, $a1, .LBB0_3
+; LA32-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; LA32-NEXT: xor $a5, $a4, $a1
+; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: xor $a5, $a4, $a5
+; LA32-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1
+; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: beq $a5, $zero, .LBB0_1
+; LA32-NEXT: # %bb.4:
+; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: ret
+;
; LA64-LABEL: atomicrmw_umax_i8_acquire:
; LA64: # %bb.0:
; LA64-NEXT: slli.d $a2, $a0, 3
@@ -33,6 +60,32 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
}
define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i16_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $a2, $zero, -4
+; LA32-NEXT: and $a2, $a0, $a2
+; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: lu12i.w $a3, 15
+; LA32-NEXT: ori $a3, $a3, 4095
+; LA32-NEXT: sll.w $a4, $a3, $a0
+; LA32-NEXT: and $a1, $a1, $a3
+; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: and $a6, $a3, $a4
+; LA32-NEXT: move $a5, $a3
+; LA32-NEXT: bgeu $a6, $a1, .LBB1_3
+; LA32-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; LA32-NEXT: xor $a5, $a3, $a1
+; LA32-NEXT: and $a5, $a5, $a4
+; LA32-NEXT: xor $a5, $a3, $a5
+; LA32-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1
+; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: beq $a5, $zero, .LBB1_1
+; LA32-NEXT: # %bb.4:
+; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: ret
+;
; LA64-LABEL: atomicrmw_umax_i16_acquire:
; LA64: # %bb.0:
; LA64-NEXT: slli.d $a2, $a0, 3
@@ -62,6 +115,21 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
}
define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i32_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: ll.w $a2, $a0, 0
+; LA32-NEXT: move $a3, $a2
+; LA32-NEXT: bgeu $a2, $a1, .LBB2_3
+; LA32-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; LA32-NEXT: move $a3, $a1
+; LA32-NEXT: .LBB2_3: # in Loop: Header=BB2_1 Depth=1
+; LA32-NEXT: sc.w $a3, $a0, 0
+; LA32-NEXT: beq $a3, $zero, .LBB2_1
+; LA32-NEXT: # %bb.4:
+; LA32-NEXT: move $a0, $a2
+; LA32-NEXT: ret
+;
; LA64-LABEL: atomicrmw_umax_i32_acquire:
; LA64: # %bb.0:
; LA64-NEXT: ammax_db.wu $a2, $a1, $a0
@@ -72,6 +140,65 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
}
define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i64_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: ld.w $a5, $a0, 4
+; LA32-NEXT: ld.w $a4, $a0, 0
+; LA32-NEXT: move $s0, $a2
+; LA32-NEXT: move $s1, $a1
+; LA32-NEXT: addi.w $s2, $sp, 0
+; LA32-NEXT: b .LBB3_2
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB3_1: # %atomicrmw.start
+; LA32-NEXT: # in Loop: Header=BB3_2 Depth=1
+; LA32-NEXT: st.w $a4, $sp, 0
+; LA32-NEXT: st.w $a5, $sp, 4
+; LA32-NEXT: ori $a4, $zero, 2
+; LA32-NEXT: ori $a5, $zero, 2
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: move $a1, $s2
+; LA32-NEXT: bl __atomic_compare_exchange_8
+; LA32-NEXT: ld.w $a5, $sp, 4
+; LA32-NEXT: ld.w $a4, $sp, 0
+; LA32-NEXT: bne $a0, $zero, .LBB3_7
+; LA32-NEXT: .LBB3_2: # %atomicrmw.start
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/141557
More information about the llvm-commits
mailing list