[llvm] bba97c3 - [LoongArch] Add codegen support for cmpxchg on LA64
Weining Lu via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 27 06:22:22 PDT 2022
Author: gonglingqin
Date: 2022-10-27T21:18:17+08:00
New Revision: bba97c3c03a9e4adb09470295b013652137e9490
URL: https://github.com/llvm/llvm-project/commit/bba97c3c03a9e4adb09470295b013652137e9490
DIFF: https://github.com/llvm/llvm-project/commit/bba97c3c03a9e4adb09470295b013652137e9490.diff
LOG: [LoongArch] Add codegen support for cmpxchg on LA64
Differential Revision: https://reviews.llvm.org/D135948
Added:
llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
Modified:
llvm/include/llvm/IR/IntrinsicsLoongArch.td
llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index 9f25b6d87354a..849a616908350 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -29,10 +29,20 @@ multiclass MaskedAtomicRMWIntrinsics {
def _i64 : MaskedAtomicRMW<llvm_i64_ty>;
}
+multiclass MaskedAtomicRMWFiveOpIntrinsics {
+ // TODO: Support cmpxchg on LA32.
+ // i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64, i64 imm);
+ def _i64 : MaskedAtomicRMWFiveArg<llvm_i64_ty>;
+}
+
defm int_loongarch_masked_atomicrmw_xchg : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_add : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_sub : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_nand : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_umax : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
+
+// @llvm.loongarch.masked.cmpxchg.i64.<p>(
+// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering)
+defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics;
} // TargetPrefix = "loongarch"
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
index 803e02f258dc1..5640e16f1f70c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -55,6 +55,9 @@ class LoongArchExpandAtomicPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp, bool IsMasked, int Width,
MachineBasicBlock::iterator &NextMBBI);
+ bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, bool IsMasked,
+ int Width, MachineBasicBlock::iterator &NextMBBI);
};
char LoongArchExpandAtomicPseudo::ID = 0;
@@ -124,6 +127,12 @@ bool LoongArchExpandAtomicPseudo::expandMI(
case LoongArch::PseudoMaskedAtomicLoadUMin32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
NextMBBI);
+ case LoongArch::PseudoCmpXchg32:
+ return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
+ case LoongArch::PseudoCmpXchg64:
+ return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
+ case LoongArch::PseudoMaskedCmpXchg32:
+ return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
}
return false;
}
@@ -432,6 +441,131 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
return true;
}
+bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
+ int Width, MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+ auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ // Insert new MBBs.
+ MF->insert(++MBB.getIterator(), LoopHeadMBB);
+ MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
+ MF->insert(++LoopTailMBB->getIterator(), TailMBB);
+ MF->insert(++TailMBB->getIterator(), DoneMBB);
+
+ // Set up successors and transfer remaining instructions to DoneMBB.
+ LoopHeadMBB->addSuccessor(LoopTailMBB);
+ LoopHeadMBB->addSuccessor(TailMBB);
+ LoopTailMBB->addSuccessor(DoneMBB);
+ LoopTailMBB->addSuccessor(LoopHeadMBB);
+ TailMBB->addSuccessor(DoneMBB);
+ DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
+ DoneMBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(LoopHeadMBB);
+
+ Register DestReg = MI.getOperand(0).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(2).getReg();
+ Register CmpValReg = MI.getOperand(3).getReg();
+ Register NewValReg = MI.getOperand(4).getReg();
+
+ if (!IsMasked) {
+ // .loophead:
+ // ll.[w|d] dest, (addr)
+ // bne dest, cmpval, tail
+ BuildMI(LoopHeadMBB, DL,
+ TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
+ .addReg(AddrReg)
+ .addImm(0);
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE))
+ .addReg(DestReg)
+ .addReg(CmpValReg)
+ .addMBB(TailMBB);
+ // .looptail:
+ // dbar 0
+ // move scratch, newval
+ // sc.[w|d] scratch, scratch, (addr)
+ // beqz scratch, loophead
+ // b done
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
+ .addReg(NewValReg)
+ .addReg(LoongArch::R0);
+ BuildMI(LoopTailMBB, DL,
+ TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D),
+ ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(AddrReg)
+ .addImm(0);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
+ .addReg(ScratchReg)
+ .addMBB(LoopHeadMBB);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
+ } else {
+ // .loophead:
+ // ll.[w|d] dest, (addr)
+ // and scratch, dest, mask
+ // bne scratch, cmpval, tail
+ Register MaskReg = MI.getOperand(5).getReg();
+ BuildMI(LoopHeadMBB, DL,
+ TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
+ .addReg(AddrReg)
+ .addImm(0);
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), ScratchReg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE))
+ .addReg(ScratchReg)
+ .addReg(CmpValReg)
+ .addMBB(TailMBB);
+
+ // .looptail:
+ // dbar 0
+ // andn scratch, dest, mask
+ // or scratch, scratch, newval
+ // sc.[w|d] scratch, scratch, (addr)
+ // beqz scratch, loophead
+ // b done
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(NewValReg);
+ BuildMI(LoopTailMBB, DL,
+ TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D),
+ ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(AddrReg)
+ .addImm(0);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
+ .addReg(ScratchReg)
+ .addMBB(LoopHeadMBB);
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
+ }
+
+ // .tail:
+ // dbar 0x700
+ BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
+ computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
+ computeAndAddLiveIns(LiveRegs, *TailMBB);
+ computeAndAddLiveIns(LiveRegs, *DoneMBB);
+
+ return true;
+}
+
} // end namespace
INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo",
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 62923ba0d3c6d..f99764fb80225 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2084,6 +2084,35 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
llvm_unreachable("Unexpected GRLen\n");
}
+TargetLowering::AtomicExpansionKind
+LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
+ AtomicCmpXchgInst *CI) const {
+ unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
+ if (Size == 8 || Size == 16)
+ return AtomicExpansionKind::MaskedIntrinsic;
+ return AtomicExpansionKind::None;
+}
+
+Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
+ IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
+ Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
+ Value *Ordering =
+ Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(Ord));
+
+ // TODO: Support cmpxchg on LA32.
+ Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
+ CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
+ NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
+ Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ Type *Tys[] = {AlignedAddr->getType()};
+ Function *MaskedCmpXchg =
+ Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
+ Value *Result = Builder.CreateCall(
+ MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
+}
+
Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index ca6de97a67d08..1e411fb34f727 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -109,6 +109,13 @@ class LoongArchTargetLowering : public TargetLowering {
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
+ TargetLowering::AtomicExpansionKind
+ shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
+ Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
+ AtomicCmpXchgInst *CI,
+ Value *AlignedAddr, Value *CmpVal,
+ Value *NewVal, Value *Mask,
+ AtomicOrdering Ord) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
@@ -123,6 +130,10 @@ class LoongArchTargetLowering : public TargetLowering {
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+ ISD::NodeType getExtendForAtomicOps() const override {
+ return ISD::SIGN_EXTEND;
+ }
+
private:
/// Target-specific function used to lower LoongArch calling conventions.
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index e3ac862bb93e5..0bac2222d3574 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1180,6 +1180,30 @@ class PseudoMaskedAMUMinUMax
def PseudoMaskedAtomicLoadUMax32 : PseudoMaskedAMUMinUMax;
def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMUMinUMax;
+/// Compare and exchange
+
+class PseudoCmpXchg
+ : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval), []> {
+ let Constraints = "@earlyclobber $res, at earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
+def PseudoCmpXchg32 : PseudoCmpXchg;
+def PseudoCmpXchg64 : PseudoCmpXchg;
+
+def PseudoMaskedCmpXchg32
+ : Pseudo<(outs GPR:$res, GPR:$scratch),
+ (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
+ grlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res, at earlyclobber $scratch";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+
class AtomicPat<Intrinsic intrin, Pseudo AMInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
(AMInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
@@ -1233,6 +1257,15 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
PseudoMaskedAtomicLoadUMax32>;
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
PseudoMaskedAtomicLoadUMin32>;
+
+def : Pat<(atomic_cmp_swap_64 GPR:$addr, GPR:$cmp, GPR:$new),
+ (PseudoCmpXchg64 GPR:$addr, GPR:$cmp, GPR:$new)>;
+def : Pat<(int_loongarch_masked_cmpxchg_i64
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
+ (PseudoMaskedCmpXchg32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
+def : Pat<(atomic_cmp_swap_32 GPR:$addr, GPR:$cmp, GPR:$new),
+ (PseudoCmpXchg32 GPR:$addr, GPR:$cmp, GPR:$new)>;
} // Predicates = [IsLA64]
def : Pat<(atomic_load_nand_32 GPR:$rj, GPR:$rk),
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
new file mode 100644
index 0000000000000..b8c0cb257122a
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll
@@ -0,0 +1,356 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
+
+define void @cmpxchg_i8_acquire_acquire(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; LA64-LABEL: cmpxchg_i8_acquire_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a3, $zero, -4
+; LA64-NEXT: and $a3, $a0, $a3
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: andi $a2, $a2, 255
+; LA64-NEXT: sll.w $a2, $a2, $a0
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a0, $a4, $a0
+; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: addi.w $a2, $a2, 0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a3, 0
+; LA64-NEXT: and $a5, $a4, $a0
+; LA64-NEXT: bne $a5, $a1, .LBB0_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: andn $a5, $a4, $a0
+; LA64-NEXT: or $a5, $a5, $a2
+; LA64-NEXT: sc.w $a5, $a3, 0
+; LA64-NEXT: beqz $a5, .LBB0_1
+; LA64-NEXT: b .LBB0_4
+; LA64-NEXT: .LBB0_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB0_4:
+; LA64-NEXT: ret
+ %res = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
+ ret void
+}
+
+define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; LA64-LABEL: cmpxchg_i16_acquire_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a3, $zero, -4
+; LA64-NEXT: and $a3, $a0, $a3
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
+; LA64-NEXT: sll.w $a2, $a2, $a0
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a0, $a4, $a0
+; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: addi.w $a2, $a2, 0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a4, $a3, 0
+; LA64-NEXT: and $a5, $a4, $a0
+; LA64-NEXT: bne $a5, $a1, .LBB1_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: andn $a5, $a4, $a0
+; LA64-NEXT: or $a5, $a5, $a2
+; LA64-NEXT: sc.w $a5, $a3, 0
+; LA64-NEXT: beqz $a5, .LBB1_1
+; LA64-NEXT: b .LBB1_4
+; LA64-NEXT: .LBB1_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB1_4:
+; LA64-NEXT: ret
+ %res = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
+ ret void
+}
+
+define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; LA64-LABEL: cmpxchg_i32_acquire_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a1, .LBB2_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: move $a4, $a2
+; LA64-NEXT: sc.w $a4, $a0, 0
+; LA64-NEXT: beqz $a4, .LBB2_1
+; LA64-NEXT: b .LBB2_4
+; LA64-NEXT: .LBB2_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB2_4:
+; LA64-NEXT: ret
+ %res = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
+ ret void
+}
+
+define void @cmpxchg_i64_acquire_acquire(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; LA64-LABEL: cmpxchg_i64_acquire_acquire:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.d $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a1, .LBB3_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: move $a4, $a2
+; LA64-NEXT: sc.d $a4, $a0, 0
+; LA64-NEXT: beqz $a4, .LBB3_1
+; LA64-NEXT: b .LBB3_4
+; LA64-NEXT: .LBB3_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB3_4:
+; LA64-NEXT: ret
+ %res = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
+ ret void
+}
+
+define i8 @cmpxchg_i8_acquire_acquire_reti8(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti8:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a3, $zero, -4
+; LA64-NEXT: and $a3, $a0, $a3
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a0
+; LA64-NEXT: addi.w $a4, $a4, 0
+; LA64-NEXT: andi $a2, $a2, 255
+; LA64-NEXT: sll.w $a2, $a2, $a0
+; LA64-NEXT: addi.w $a2, $a2, 0
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a3, 0
+; LA64-NEXT: and $a6, $a5, $a4
+; LA64-NEXT: bne $a6, $a1, .LBB4_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: andn $a6, $a5, $a4
+; LA64-NEXT: or $a6, $a6, $a2
+; LA64-NEXT: sc.w $a6, $a3, 0
+; LA64-NEXT: beqz $a6, .LBB4_1
+; LA64-NEXT: b .LBB4_4
+; LA64-NEXT: .LBB4_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB4_4:
+; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
+ %res = extractvalue { i8, i1 } %tmp, 0
+ ret i8 %res
+}
+
+define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti16:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a3, $zero, -4
+; LA64-NEXT: and $a3, $a0, $a3
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a0
+; LA64-NEXT: addi.w $a4, $a4, 0
+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
+; LA64-NEXT: sll.w $a2, $a2, $a0
+; LA64-NEXT: addi.w $a2, $a2, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: addi.w $a1, $a1, 0
+; LA64-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a5, $a3, 0
+; LA64-NEXT: and $a6, $a5, $a4
+; LA64-NEXT: bne $a6, $a1, .LBB5_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: andn $a6, $a5, $a4
+; LA64-NEXT: or $a6, $a6, $a2
+; LA64-NEXT: sc.w $a6, $a3, 0
+; LA64-NEXT: beqz $a6, .LBB5_1
+; LA64-NEXT: b .LBB5_4
+; LA64-NEXT: .LBB5_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB5_4:
+; LA64-NEXT: srl.w $a0, $a5, $a0
+; LA64-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
+ %res = extractvalue { i16, i1 } %tmp, 0
+ ret i16 %res
+}
+
+define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a1, .LBB6_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: move $a4, $a2
+; LA64-NEXT: sc.w $a4, $a0, 0
+; LA64-NEXT: beqz $a4, .LBB6_1
+; LA64-NEXT: b .LBB6_4
+; LA64-NEXT: .LBB6_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB6_4:
+; LA64-NEXT: move $a0, $a3
+; LA64-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
+ %res = extractvalue { i32, i1 } %tmp, 0
+ ret i32 %res
+}
+
+define i64 @cmpxchg_i64_acquire_acquire_reti64(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti64:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.d $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a1, .LBB7_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB7_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: move $a4, $a2
+; LA64-NEXT: sc.d $a4, $a0, 0
+; LA64-NEXT: beqz $a4, .LBB7_1
+; LA64-NEXT: b .LBB7_4
+; LA64-NEXT: .LBB7_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB7_4:
+; LA64-NEXT: move $a0, $a3
+; LA64-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
+ %res = extractvalue { i64, i1 } %tmp, 0
+ ret i64 %res
+}
+
+define i1 @cmpxchg_i8_acquire_acquire_reti1(ptr %ptr, i8 %cmp, i8 %val) nounwind {
+; LA64-LABEL: cmpxchg_i8_acquire_acquire_reti1:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a3, $zero, -4
+; LA64-NEXT: and $a3, $a0, $a3
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: andi $a1, $a1, 255
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: ori $a4, $zero, 255
+; LA64-NEXT: sll.w $a4, $a4, $a0
+; LA64-NEXT: andi $a2, $a2, 255
+; LA64-NEXT: sll.w $a0, $a2, $a0
+; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: addi.w $a2, $a4, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a6, $a3, 0
+; LA64-NEXT: and $a7, $a6, $a2
+; LA64-NEXT: bne $a7, $a5, .LBB8_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB8_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: andn $a7, $a6, $a2
+; LA64-NEXT: or $a7, $a7, $a0
+; LA64-NEXT: sc.w $a7, $a3, 0
+; LA64-NEXT: beqz $a7, .LBB8_1
+; LA64-NEXT: b .LBB8_4
+; LA64-NEXT: .LBB8_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB8_4:
+; LA64-NEXT: and $a0, $a6, $a4
+; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
+; LA64-NEXT: xor $a0, $a1, $a0
+; LA64-NEXT: sltui $a0, $a0, 1
+; LA64-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i8 %cmp, i8 %val acquire acquire
+ %res = extractvalue { i8, i1 } %tmp, 1
+ ret i1 %res
+}
+
+define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounwind {
+; LA64-LABEL: cmpxchg_i16_acquire_acquire_reti1:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.w $a3, $zero, -4
+; LA64-NEXT: and $a3, $a0, $a3
+; LA64-NEXT: slli.d $a0, $a0, 3
+; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
+; LA64-NEXT: sll.w $a1, $a1, $a0
+; LA64-NEXT: lu12i.w $a4, 15
+; LA64-NEXT: ori $a4, $a4, 4095
+; LA64-NEXT: sll.w $a4, $a4, $a0
+; LA64-NEXT: bstrpick.d $a2, $a2, 15, 0
+; LA64-NEXT: sll.w $a0, $a2, $a0
+; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: addi.w $a2, $a4, 0
+; LA64-NEXT: addi.w $a5, $a1, 0
+; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a6, $a3, 0
+; LA64-NEXT: and $a7, $a6, $a2
+; LA64-NEXT: bne $a7, $a5, .LBB9_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB9_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: andn $a7, $a6, $a2
+; LA64-NEXT: or $a7, $a7, $a0
+; LA64-NEXT: sc.w $a7, $a3, 0
+; LA64-NEXT: beqz $a7, .LBB9_1
+; LA64-NEXT: b .LBB9_4
+; LA64-NEXT: .LBB9_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB9_4:
+; LA64-NEXT: and $a0, $a6, $a4
+; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT: bstrpick.d $a1, $a1, 31, 0
+; LA64-NEXT: xor $a0, $a1, $a0
+; LA64-NEXT: sltui $a0, $a0, 1
+; LA64-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i16 %cmp, i16 %val acquire acquire
+ %res = extractvalue { i16, i1 } %tmp, 1
+ ret i1 %res
+}
+
+define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind {
+; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.w $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a1, .LBB10_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB10_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: move $a4, $a2
+; LA64-NEXT: sc.w $a4, $a0, 0
+; LA64-NEXT: beqz $a4, .LBB10_1
+; LA64-NEXT: b .LBB10_4
+; LA64-NEXT: .LBB10_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB10_4:
+; LA64-NEXT: addi.w $a0, $a1, 0
+; LA64-NEXT: xor $a0, $a3, $a0
+; LA64-NEXT: sltui $a0, $a0, 1
+; LA64-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire
+ %res = extractvalue { i32, i1 } %tmp, 1
+ ret i1 %res
+}
+
+define i1 @cmpxchg_i64_acquire_acquire_reti1(ptr %ptr, i64 %cmp, i64 %val) nounwind {
+; LA64-LABEL: cmpxchg_i64_acquire_acquire_reti1:
+; LA64: # %bb.0:
+; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1
+; LA64-NEXT: ll.d $a3, $a0, 0
+; LA64-NEXT: bne $a3, $a1, .LBB11_3
+; LA64-NEXT: # %bb.2: # in Loop: Header=BB11_1 Depth=1
+; LA64-NEXT: dbar 0
+; LA64-NEXT: move $a4, $a2
+; LA64-NEXT: sc.d $a4, $a0, 0
+; LA64-NEXT: beqz $a4, .LBB11_1
+; LA64-NEXT: b .LBB11_4
+; LA64-NEXT: .LBB11_3:
+; LA64-NEXT: dbar 1792
+; LA64-NEXT: .LBB11_4:
+; LA64-NEXT: xor $a0, $a3, $a1
+; LA64-NEXT: sltui $a0, $a0, 1
+; LA64-NEXT: ret
+ %tmp = cmpxchg ptr %ptr, i64 %cmp, i64 %val acquire acquire
+ %res = extractvalue { i64, i1 } %tmp, 1
+ ret i1 %res
+}
More information about the llvm-commits
mailing list