[llvm] [RISCV] Initial ISel support for the experimental zacas extension (PR #67918)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 1 07:45:46 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
<details>
<summary>Changes</summary>
This patch implements the v1.0-rc3 draft extension based on AArch64's version.
It introduces two register classes (GPRPI64/GPRPI128) and some pseudo instructions for correct register allocation.
These pseudo instructions will be expanded in the `RISCVExpandAtomicPseudoInsts` pass.
Migrated from https://reviews.llvm.org/D158956.
---
Patch is 196.64 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67918.diff
7 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp (+145)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+70-1)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoA.td (+22)
- (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.td (+20)
- (modified) llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll (+305-57)
- (modified) llvm/test/CodeGen/RISCV/atomic-cmpxchg-flag.ll (+10)
- (modified) llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll (+3435-285)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index bb772fc5da92244..43abdc2b06f8af4 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -59,6 +59,9 @@ class RISCVExpandAtomicPseudo : public MachineFunctionPass {
bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsMasked,
int Width, MachineBasicBlock::iterator &NextMBBI);
+ bool expandAMOCAS(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ bool IsPaired, int Width,
+ MachineBasicBlock::iterator &NextMBBI);
#ifndef NDEBUG
unsigned getInstSizeInBytes(const MachineFunction &MF) const {
unsigned Size = 0;
@@ -145,6 +148,14 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
case RISCV::PseudoMaskedCmpXchg32:
return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
+ case RISCV::PseudoAMOCAS_W:
+ return expandAMOCAS(MBB, MBBI, false, 32, NextMBBI);
+ case RISCV::PseudoAMOCAS_D_64:
+ return expandAMOCAS(MBB, MBBI, false, 64, NextMBBI);
+ case RISCV::PseudoAMOCAS_D_32:
+ return expandAMOCAS(MBB, MBBI, true, 64, NextMBBI);
+ case RISCV::PseudoAMOCAS_Q:
+ return expandAMOCAS(MBB, MBBI, true, 128, NextMBBI);
}
return false;
@@ -256,6 +267,74 @@ static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
llvm_unreachable("Unexpected SC width\n");
}
+static unsigned getAMOCASForRMW32(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::AMOCAS_W;
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::AMOCAS_W;
+ case AtomicOrdering::Acquire:
+ return RISCV::AMOCAS_W_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::AMOCAS_W_RL;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::AMOCAS_W_AQ_RL;
+ }
+}
+
+static unsigned getAMOCASForRMW64(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::AMOCAS_D;
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::AMOCAS_D;
+ case AtomicOrdering::Acquire:
+ return RISCV::AMOCAS_D_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::AMOCAS_D_RL;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::AMOCAS_D_AQ_RL;
+ }
+}
+
+static unsigned getAMOCASForRMW128(AtomicOrdering Ordering,
+ const RISCVSubtarget *Subtarget) {
+ if (Subtarget->hasStdExtZtso())
+ return RISCV::AMOCAS_Q;
+ switch (Ordering) {
+ default:
+ llvm_unreachable("Unexpected AtomicOrdering");
+ case AtomicOrdering::Monotonic:
+ return RISCV::AMOCAS_Q;
+ case AtomicOrdering::Acquire:
+ return RISCV::AMOCAS_Q_AQ;
+ case AtomicOrdering::Release:
+ return RISCV::AMOCAS_Q_RL;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return RISCV::AMOCAS_Q_AQ_RL;
+ }
+}
+
+static unsigned getAMOCASForRMW(AtomicOrdering Ordering, int Width,
+ const RISCVSubtarget *Subtarget) {
+ if (Width == 32)
+ return getAMOCASForRMW32(Ordering, Subtarget);
+ if (Width == 64)
+ return getAMOCASForRMW64(Ordering, Subtarget);
+ if (Width == 128)
+ return getAMOCASForRMW128(Ordering, Subtarget);
+ llvm_unreachable("Unexpected AMOCAS width\n");
+}
+
static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
DebugLoc DL, MachineBasicBlock *ThisMBB,
MachineBasicBlock *LoopMBB,
@@ -728,6 +807,72 @@ bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
return true;
}
+static Register getGPRPairEvenReg(Register PairedReg) {
+ switch (PairedReg) {
+ case RISCV::X0_PD:
+ return RISCV::X0;
+ case RISCV::X2_PD:
+ return RISCV::X2;
+ case RISCV::X4_PD:
+ return RISCV::X4;
+ case RISCV::X6_PD:
+ return RISCV::X6;
+ case RISCV::X8_PD:
+ return RISCV::X8;
+ case RISCV::X10_PD:
+ return RISCV::X10;
+ case RISCV::X12_PD:
+ return RISCV::X12;
+ case RISCV::X14_PD:
+ return RISCV::X14;
+ case RISCV::X16_PD:
+ return RISCV::X16;
+ case RISCV::X18_PD:
+ return RISCV::X18;
+ case RISCV::X20_PD:
+ return RISCV::X20;
+ case RISCV::X22_PD:
+ return RISCV::X22;
+ case RISCV::X24_PD:
+ return RISCV::X24;
+ case RISCV::X26_PD:
+ return RISCV::X26;
+ case RISCV::X28_PD:
+ return RISCV::X28;
+ case RISCV::X30_PD:
+ return RISCV::X30;
+ default:
+ llvm_unreachable("Unexpected GPR pair");
+ }
+}
+
+bool RISCVExpandAtomicPseudo::expandAMOCAS(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsPaired,
+ int Width, MachineBasicBlock::iterator &NextMBBI) {
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ Register DestReg = MI.getOperand(0).getReg();
+ if (IsPaired)
+ DestReg = getGPRPairEvenReg(DestReg);
+ Register AddrReg = MI.getOperand(1).getReg();
+ Register NewValReg = MI.getOperand(3).getReg();
+ if (IsPaired)
+ NewValReg = getGPRPairEvenReg(NewValReg);
+ AtomicOrdering Ordering =
+ static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
+
+ MachineInstr *NewMI =
+ BuildMI(MBB, MBBI, DL, TII->get(getAMOCASForRMW(Ordering, Width, STI)))
+ .addReg(DestReg)
+ .addReg(AddrReg)
+ .addReg(NewValReg);
+ NewMI->getOperand(0).setIsDef(true);
+
+ MI.eraseFromParent();
+ return true;
+}
+
} // end of anonymous namespace
INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5e3975df1c4425d..de640deda82e92b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -552,7 +552,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (Subtarget.hasStdExtA()) {
- setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
+ if (Subtarget.hasStdExtZacas())
+ setMaxAtomicSizeInBitsSupported(Subtarget.getXLen() * 2);
+ else
+ setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
} else if (Subtarget.hasForcedAtomics()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
@@ -1249,6 +1252,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
XLenVT, LibCall);
}
+ // Set atomic_cmp_swap operations to expand to AMOCAS.D (RV32) and AMOCAS.Q
+ // (RV64).
+ if (Subtarget.hasStdExtZacas())
+ setOperationAction(ISD::ATOMIC_CMP_SWAP,
+ Subtarget.is64Bit() ? MVT::i128 : MVT::i64, Custom);
+
if (Subtarget.hasVendorXTHeadMemIdx()) {
for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
++im) {
@@ -10451,6 +10460,63 @@ static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
}
+// Create an even/odd pair of X registers holding integer value V.
+static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V, MVT VT,
+ MVT SubRegVT) {
+ SDLoc DL(V.getNode());
+ SDValue VLo = DAG.getAnyExtOrTrunc(V, DL, SubRegVT);
+ SDValue VHi = DAG.getAnyExtOrTrunc(
+ DAG.getNode(
+ ISD::SRL, DL, VT, V,
+ DAG.getConstant(SubRegVT == MVT::i64 ? 64 : 32, DL, SubRegVT)),
+ DL, SubRegVT);
+ SDValue RegClass = DAG.getTargetConstant(
+ VT == MVT::i128 ? RISCV::GPRPI128RegClassID : RISCV::GPRPI64RegClassID,
+ DL, MVT::i32);
+ SDValue SubReg0 = DAG.getTargetConstant(RISCV::sub_32, DL, MVT::i32);
+ SDValue SubReg1 = DAG.getTargetConstant(RISCV::sub_32_hi, DL, MVT::i32);
+ const SDValue Ops[] = {RegClass, VLo, SubReg0, VHi, SubReg1};
+ return SDValue(
+ DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops), 0);
+}
+
+static void ReplaceCMP_SWAP_2XLenResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VT = N->getSimpleValueType(0);
+ assert(N->getValueType(0) == (Subtarget.is64Bit() ? MVT::i128 : MVT::i64) &&
+ "AtomicCmpSwap on types less than 2*XLen should be legal");
+ assert(Subtarget.hasStdExtZacas());
+ MVT SubRegVT = (VT == MVT::i64 ? MVT::i32 : MVT::i64);
+
+ SDLoc DL(N);
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ AtomicOrdering Ordering = MemOp->getMergedOrdering();
+ SDValue Ops[] = {
+ N->getOperand(1), // Ptr
+ createGPRPairNode(DAG, N->getOperand(2), VT, SubRegVT), // Compare value
+ createGPRPairNode(DAG, N->getOperand(3), VT, SubRegVT), // Store value
+ DAG.getTargetConstant(static_cast<unsigned>(Ordering), DL,
+ MVT::i32), // Ordering
+ N->getOperand(0), // Chain in
+ };
+
+ unsigned Opcode =
+ (VT == MVT::i64 ? RISCV::PseudoAMOCAS_D_32 : RISCV::PseudoAMOCAS_Q);
+ MachineSDNode *CmpSwap = DAG.getMachineNode(
+ Opcode, DL, DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
+ DAG.setNodeMemRefs(CmpSwap, {MemOp});
+
+ unsigned SubReg1 = RISCV::sub_32, SubReg2 = RISCV::sub_32_hi;
+ SDValue Lo =
+ DAG.getTargetExtractSubreg(SubReg1, DL, SubRegVT, SDValue(CmpSwap, 0));
+ SDValue Hi =
+ DAG.getTargetExtractSubreg(SubReg2, DL, SubRegVT, SDValue(CmpSwap, 0));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, VT, Lo, Hi));
+ Results.push_back(SDValue(CmpSwap, 1));
+}
+
void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
@@ -10458,6 +10524,9 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom type legalize this operation!");
+ case ISD::ATOMIC_CMP_SWAP:
+ ReplaceCMP_SWAP_2XLenResults(N, Results, DAG, Subtarget);
+ break;
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index c43af14bb7f7005..d175ae49f1919ce 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -295,6 +295,28 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}
+let Predicates = [HasStdExtZacas] in {
+class PseudoAMOCAS<RegisterClass RC = GPR>
+ : Pseudo<(outs RC:$res),
+ (ins GPR:$addr, RC:$cmpval, RC:$newval, ixlenimm:$ordering), []> {
+ let Constraints = "@earlyclobber $res, $res = $cmpval";
+ let mayLoad = 1;
+ let mayStore = 1;
+ let hasSideEffects = 0;
+}
+def PseudoAMOCAS_W: PseudoAMOCAS;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoAMOCAS_W>;
+
+let Predicates = [HasStdExtZacas, IsRV32] in {
+ def PseudoAMOCAS_D_32: PseudoAMOCAS<GPRPI64>;
+}
+let Predicates = [HasStdExtZacas, IsRV64] in {
+ def PseudoAMOCAS_D_64: PseudoAMOCAS;
+ defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoAMOCAS_D_64>;
+ def PseudoAMOCAS_Q: PseudoAMOCAS<GPRPI128>;
+}
+}
+
def PseudoCmpXchg32 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index ab0d354967b34c7..9c3c8b85782f613 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -578,6 +578,26 @@ def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add
X0_PD, X2_PD, X4_PD
)>;
+let RegInfos = RegInfoByHwMode<[RV32], [RegInfo<32, 32, 32>]> in
+def GPRPI64 : RegisterClass<"RISCV", [i64], 32, (add
+ X10_PD, X12_PD, X14_PD, X16_PD,
+ X6_PD,
+ X28_PD, X30_PD,
+ X8_PD,
+ X18_PD, X20_PD, X22_PD, X24_PD, X26_PD,
+ X0_PD, X2_PD, X4_PD
+)>;
+
+let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in
+def GPRPI128 : RegisterClass<"RISCV", [i128], 64, (add
+ X10_PD, X12_PD, X14_PD, X16_PD,
+ X6_PD,
+ X28_PD, X30_PD,
+ X8_PD,
+ X18_PD, X20_PD, X22_PD, X24_PD, X26_PD,
+ X0_PD, X2_PD, X4_PD
+)>;
+
// The register class is added for inline assembly for vector mask types.
def VM : VReg<VMaskVTs,
(add (sequence "V%u", 8, 31),
diff --git a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
index 651f58d324422f2..08c9aa685479dfa 100644
--- a/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
@@ -1,30 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=CHECK,RV32IA %s
+; RUN: | FileCheck -check-prefixes=RV32IA %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA-ZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
-; RUN: | FileCheck -check-prefixes=CHECK,RV64IA %s
+; RUN: | FileCheck -check-prefixes=RV64IA %s
+; RUN: llc -mtriple=riscv64 -mattr=+a,+experimental-zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA-ZACAS %s
; Test cmpxchg followed by a branch on the cmpxchg success value to see if the
; branch is folded into the cmpxchg expansion.
define void @cmpxchg_and_branch1(ptr %ptr, i32 signext %cmp, i32 signext %val) nounwind {
-; CHECK-LABEL: cmpxchg_and_branch1:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: .LBB0_1: # %do_cmpxchg
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB0_3 Depth 2
-; CHECK-NEXT: .LBB0_3: # %do_cmpxchg
-; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lr.w.aqrl a3, (a0)
-; CHECK-NEXT: bne a3, a1, .LBB0_1
-; CHECK-NEXT: # %bb.4: # %do_cmpxchg
-; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=2
-; CHECK-NEXT: sc.w.rl a4, a2, (a0)
-; CHECK-NEXT: bnez a4, .LBB0_3
-; CHECK-NEXT: # %bb.5: # %do_cmpxchg
-; CHECK-NEXT: # %bb.2: # %exit
-; CHECK-NEXT: ret
+; RV32IA-LABEL: cmpxchg_and_branch1:
+; RV32IA: # %bb.0: # %entry
+; RV32IA-NEXT: .LBB0_1: # %do_cmpxchg
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB0_3 Depth 2
+; RV32IA-NEXT: .LBB0_3: # %do_cmpxchg
+; RV32IA-NEXT: # Parent Loop BB0_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-NEXT: bne a3, a1, .LBB0_1
+; RV32IA-NEXT: # %bb.4: # %do_cmpxchg
+; RV32IA-NEXT: # in Loop: Header=BB0_3 Depth=2
+; RV32IA-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-NEXT: bnez a4, .LBB0_3
+; RV32IA-NEXT: # %bb.5: # %do_cmpxchg
+; RV32IA-NEXT: # %bb.2: # %exit
+; RV32IA-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_and_branch1:
+; RV32IA-ZACAS: # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT: .LBB0_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: mv a3, a1
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
+; RV32IA-ZACAS-NEXT: bne a3, a1, .LBB0_1
+; RV32IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV64IA-LABEL: cmpxchg_and_branch1:
+; RV64IA: # %bb.0: # %entry
+; RV64IA-NEXT: .LBB0_1: # %do_cmpxchg
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB0_3 Depth 2
+; RV64IA-NEXT: .LBB0_3: # %do_cmpxchg
+; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-NEXT: bne a3, a1, .LBB0_1
+; RV64IA-NEXT: # %bb.4: # %do_cmpxchg
+; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2
+; RV64IA-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-NEXT: bnez a4, .LBB0_3
+; RV64IA-NEXT: # %bb.5: # %do_cmpxchg
+; RV64IA-NEXT: # %bb.2: # %exit
+; RV64IA-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_and_branch1:
+; RV64IA-ZACAS: # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT: .LBB0_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: mv a3, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
+; RV64IA-ZACAS-NEXT: bne a3, a1, .LBB0_1
+; RV64IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT: ret
entry:
br label %do_cmpxchg
do_cmpxchg:
@@ -36,25 +78,65 @@ exit:
}
define void @cmpxchg_and_branch2(ptr %ptr, i32 signext %cmp, i32 signext %val) nounwind {
-; CHECK-LABEL: cmpxchg_and_branch2:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: .LBB1_1: # %do_cmpxchg
-; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB1_3 Depth 2
-; CHECK-NEXT: .LBB1_3: # %do_cmpxchg
-; CHECK-NEXT: # Parent Loop BB1_1 Depth=1
-; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: lr.w.aqrl a3, (a0)
-; CHECK-NEXT: bne a3, a1, .LBB1_5
-; CHECK-NEXT: # %bb.4: # %do_cmpxchg
-; CHECK-NEXT: # in Loop: Header=BB1_3 Depth=2
-; CHECK-NEXT: sc.w.rl a4, a2, (a0)
-; CHECK-NEXT: bnez a4, .LBB1_3
-; CHECK-NEXT: .LBB1_5: # %do_cmpxchg
-; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: beq a3, a1, .LBB1_1
-; CHECK-NEXT: # %bb.2: # %exit
-; CHECK-NEXT: ret
+; RV32IA-LABEL: cmpxchg_and_branch2:
+; RV32IA: # %bb.0: # %entry
+; RV32IA-NEXT: .LBB1_1: # %do_cmpxchg
+; RV32IA-NEXT: # =>This Loop Header: Depth=1
+; RV32IA-NEXT: # Child Loop BB1_3 Depth 2
+; RV32IA-NEXT: .LBB1_3: # %do_cmpxchg
+; RV32IA-NEXT: # Parent Loop BB1_1 Depth=1
+; RV32IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV32IA-NEXT: lr.w.aqrl a3, (a0)
+; RV32IA-NEXT: bne a3, a1, .LBB1_5
+; RV32IA-NEXT: # %bb.4: # %do_cmpxchg
+; RV32IA-NEXT: # in Loop: Header=BB1_3 Depth=2
+; RV32IA-NEXT: sc.w.rl a4, a2, (a0)
+; RV32IA-NEXT: bnez a4, .LBB1_3
+; RV32IA-NEXT: .LBB1_5: # %do_cmpxchg
+; RV32IA-NEXT: # in Loop: Header=BB1_1 Depth=1
+; RV32IA-NEXT: beq a3, a1, .LBB1_1
+; RV32IA-NEXT: # %bb.2: # %exit
+; RV32IA-NEXT: ret
+;
+; RV32IA-ZACAS-LABEL: cmpxchg_and_branch2:
+; RV32IA-ZACAS: # %bb.0: # %entry
+; RV32IA-ZACAS-NEXT: .LBB1_1: # %do_cmpxchg
+; RV32IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: mv a3, a1
+; RV32IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
+; RV32IA-ZACAS-NEXT: beq a3, a1, .LBB1_1
+; RV32IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV32IA-ZACAS-NEXT: ret
+;
+; RV64IA-LABEL: cmpxchg_and_branch2:
+; RV64IA: # %bb.0: # %entry
+; RV64IA-NEXT: .LBB1_1: # %do_cmpxchg
+; RV64IA-NEXT: # =>This Loop Header: Depth=1
+; RV64IA-NEXT: # Child Loop BB1_3 Depth 2
+; RV64IA-NEXT: .LBB1_3: # %do_cmpxchg
+; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1
+; RV64IA-NEXT: # => This Inner Loop Header: Depth=2
+; RV64IA-NEXT: lr.w.aqrl a3, (a0)
+; RV64IA-NEXT: bne a3, a1, .LBB1_5
+; RV64IA-NEXT: # %bb.4: # %do_cmpxchg
+; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2
+; RV64IA-NEXT: sc.w.rl a4, a2, (a0)
+; RV64IA-NEXT: bnez a4, .LBB1_3
+; RV64IA-NEXT: .LBB1_5: # %do_cmpxchg
+; RV64IA-NEXT: # in Loop: Header=BB1_1 Depth=1
+; RV64IA-NEXT: beq a3, a1, .LBB1_1
+; RV64IA-NEXT: # %bb.2: # %exit
+; RV64IA-NEXT: ret
+;
+; RV64IA-ZACAS-LABEL: cmpxchg_and_branch2:
+; RV64IA-ZACAS: # %bb.0: # %entry
+; RV64IA-ZACAS-NEXT: .LBB1_1: # %do_cmpxchg
+; RV64IA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64IA-ZACAS-NEXT: mv a3, a1
+; RV64IA-ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
+; RV64IA-ZACAS-NEXT: beq a3, a1, .LBB1_1
+; RV64IA-ZACAS-NEXT: # %bb.2: # %exit
+; RV64IA-ZACAS-NEXT: ret
entry:
...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/67918
More information about the llvm-commits
mailing list