[llvm] 5eaebd5 - [PowerPC] Implement quadword atomic load/store
Kai Luo via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 31 23:55:44 PDT 2021
Author: Kai Luo
Date: 2021-09-01T06:55:40Z
New Revision: 5eaebd5d649ad7209ddef37ea7118e262f755a14
URL: https://github.com/llvm/llvm-project/commit/5eaebd5d649ad7209ddef37ea7118e262f755a14
DIFF: https://github.com/llvm/llvm-project/commit/5eaebd5d649ad7209ddef37ea7118e262f755a14.diff
LOG: [PowerPC] Implement quadword atomic load/store
Add support to load/store i128 atomically.
Reviewed By: jsji
Differential Revision: https://reviews.llvm.org/D105612
Added:
llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
Modified:
llvm/include/llvm/IR/IntrinsicsPowerPC.td
llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/lib/Target/PowerPC/PPCInstr64Bit.td
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index f39ec573c6332..b1d1009e4adfd 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1741,4 +1741,11 @@ let TargetPrefix = "ppc" in {
llvm_i64_ty, llvm_i64_ty,
llvm_i64_ty, llvm_i64_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
+ def int_ppc_atomic_load_i128 :
+ Intrinsic<[llvm_i64_ty, llvm_i64_ty],
+ [llvm_ptr_ty],
+ [IntrArgMemOnly, IntrReadMem, NoCapture<ArgIndex<0>>]>;
+ def int_ppc_atomic_store_i128 :
+ Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
+ [IntrArgMemOnly, IntrWriteMem, NoCapture<ArgIndex<2>>]>;
}
diff --git a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
index 9daef26ede470..be4c9dd60b007 100644
--- a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
@@ -102,6 +102,16 @@ bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
return expandAtomicRMW128(MBB, MI, NMBBI);
case PPC::ATOMIC_CMP_SWAP_I128:
return expandAtomicCmpSwap128(MBB, MI, NMBBI);
+ case PPC::BUILD_QUADWORD: {
+ Register Dst = MI.getOperand(0).getReg();
+ Register DstHi = TRI->getSubReg(Dst, PPC::sub_gp8_x0);
+ Register DstLo = TRI->getSubReg(Dst, PPC::sub_gp8_x1);
+ Register Lo = MI.getOperand(1).getReg();
+ Register Hi = MI.getOperand(2).getReg();
+ PairedCopy(TII, MBB, MI, MI.getDebugLoc(), DstHi, DstLo, Hi, Lo);
+ MI.eraseFromParent();
+ return true;
+ }
default:
return false;
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 338df538a95b9..f334347900298 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1286,8 +1286,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
}
- if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics())
+ if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) {
setMaxAtomicSizeInBitsSupported(128);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
+ }
setBooleanContents(ZeroOrOneBooleanContent);
@@ -1518,6 +1522,7 @@ void PPCTargetLowering::initializeAddrModeMap() {
PPC::MOF_NotAddNorCst | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
};
+ // TODO: Add mapping for quadword load/store.
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
@@ -10452,11 +10457,18 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
case Intrinsic::ppc_cfence: {
assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
- return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
- Op.getOperand(ArgStart + 1)),
- Op.getOperand(0)),
- 0);
+ SDValue Val = Op.getOperand(ArgStart + 1);
+ EVT Ty = Val.getValueType();
+ if (Ty == MVT::i128) {
+ // FIXME: Testing one of two paired registers is sufficient to guarantee
+ // ordering?
+ Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
+ }
+ return SDValue(
+ DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
+ Op.getOperand(0)),
+ 0);
}
default:
break;
@@ -10519,6 +10531,59 @@ SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
}
+SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
+ SelectionDAG &DAG) const {
+ AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
+ EVT MemVT = N->getMemoryVT();
+ MVT VT = MemVT.getSimpleVT();
+ assert(VT == MVT::i128 && "Expect quadword atomic operations");
+ SDLoc dl(N);
+ unsigned Opc = N->getOpcode();
+ switch (Opc) {
+ case ISD::ATOMIC_LOAD: {
+ // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
+ // lowered to ppc instructions by pattern matching instruction selector.
+ SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
+ SmallVector<SDValue, 4> Ops{
+ N->getOperand(0),
+ DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
+ for (int I = 1, E = N->getNumOperands(); I < E; ++I)
+ Ops.push_back(N->getOperand(I));
+ SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
+ Ops, MemVT, N->getMemOperand());
+ SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
+ SDValue ValHi =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
+ ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
+ DAG.getConstant(64, dl, MVT::i32));
+ SDValue Val =
+ DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
+ return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
+ {Val, LoadedVal.getValue(2)});
+ }
+ case ISD::ATOMIC_STORE: {
+ // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
+ // lowered to ppc instructions by pattern matching instruction selector.
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SmallVector<SDValue, 4> Ops{
+ N->getOperand(0),
+ DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
+ SDValue Val = N->getOperand(2);
+ SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
+ SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
+ DAG.getConstant(64, dl, MVT::i32));
+ ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
+ Ops.push_back(ValLo);
+ Ops.push_back(ValHi);
+ Ops.push_back(N->getOperand(1));
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
+ N->getMemOperand());
+ }
+ default:
+ llvm_unreachable("Unexpected atomic opcode");
+ }
+}
+
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -10910,6 +10975,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerBSWAP(Op, DAG);
case ISD::ATOMIC_CMP_SWAP:
return LowerATOMIC_CMP_SWAP(Op, DAG);
+ case ISD::ATOMIC_STORE:
+ return LowerATOMIC_LOAD_STORE(Op, DAG);
}
}
@@ -10920,6 +10987,12 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::ATOMIC_LOAD: {
+ SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
case ISD::READCYCLECOUNTER: {
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
@@ -12656,6 +12729,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
.addDef(Hi)
.addUse(Src, 0, PPC::sub_gp8_x0);
+ } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
+ MI.getOpcode() == PPC::STQX_PSEUDO) {
+ DebugLoc DL = MI.getDebugLoc();
+ // Ptr is used as the ptr_rc_no_r0 part
+ // of LQ/STQ's memory operand and adding result of RA and RB,
+ // so it has to be g8rc_and_g8rc_nox0.
+ Register Ptr =
+ F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ Register Val = MI.getOperand(0).getReg();
+ Register RA = MI.getOperand(1).getReg();
+ Register RB = MI.getOperand(2).getReg();
+ BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
+ BuildMI(*BB, MI, DL,
+ MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
+ : TII->get(PPC::STQ))
+ .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
+ .addImm(0)
+ .addReg(Ptr);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -16091,6 +16182,22 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
+ case Intrinsic::ppc_atomic_load_i128:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i128;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = Align(16);
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
+ return true;
+ case Intrinsic::ppc_atomic_store_i128:
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::i128;
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = Align(16);
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
+ return true;
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_altivec_lvebx:
@@ -17280,7 +17387,8 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
EVT MemVT = MN->getMemoryVT();
unsigned Size = MemVT.getSizeInBits();
if (MemVT.isScalarInteger()) {
- assert(Size <= 64 && "Not expecting scalar integers larger than 8 bytes!");
+ assert(Size <= 128 &&
+ "Not expecting scalar integers larger than 16 bytes!");
if (Size < 32)
FlagSet |= PPC::MOF_SubWordInt;
else if (Size == 32)
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 87579bad118f6..26df67ddcf44e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1246,6 +1246,7 @@ namespace llvm {
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 7a1b3f5945153..01323abfd475d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1342,12 +1342,25 @@ def LQ : DQForm_RTp5_RA17_MEM<56, 0,
[]>,
RegConstraint<"@earlyclobber $RTp">,
isPPC64;
+// We don't really have LQX in the ISA, make a pseudo one so that we can
+// handle x-form during isel. Make it pre-ra may expose
+// oppotunities to some opts(CSE, LICM and etc.) for the result of adding
+// RA and RB.
+def LQX_PSEUDO : PPCCustomInserterPseudo<(outs g8prc:$RTp),
+ (ins memrr:$src), "#LQX_PSEUDO", []>;
+
def RESTORE_QUADWORD : PPCEmitTimePseudo<(outs g8prc:$RTp), (ins memrix:$src),
"#RESTORE_QUADWORD", []>;
}
}
+def : Pat<(int_ppc_atomic_load_i128 iaddrX16:$src),
+ (SPLIT_QUADWORD (LQ memrix16:$src))>;
+
+def : Pat<(int_ppc_atomic_load_i128 ForceXForm:$src),
+ (SPLIT_QUADWORD (LQX_PSEUDO memrr:$src))>;
+
// Support for medium and large code model.
let hasSideEffects = 0 in {
let isReMaterializable = 1 in {
@@ -1536,12 +1549,28 @@ let mayStore = 1, hasNoSchedulingInfo = 1 in {
def STQ : DSForm_1<62, 2, (outs), (ins g8prc:$RSp, memrix:$dst),
"stq $RSp, $dst", IIC_LdStSTQ,
[]>, isPPC64;
+
+def STQX_PSEUDO : PPCCustomInserterPseudo<(outs),
+ (ins g8prc:$RSp, memrr:$dst),
+ "#STQX_PSEUDO", []>;
+
def SPILL_QUADWORD : PPCEmitTimePseudo<(outs), (ins g8prc:$RSp, memrix:$dst),
"#SPILL_QUADWORD", []>;
}
}
+def BUILD_QUADWORD : PPCPostRAExpPseudo<
+ (outs g8prc:$RTp),
+ (ins g8rc:$lo, g8rc:$hi),
+ "#BUILD_QUADWORD", []>;
+
+def : Pat<(int_ppc_atomic_store_i128 i64:$lo, i64:$hi, DSForm:$dst),
+ (STQ (BUILD_QUADWORD g8rc:$lo, g8rc:$hi), memrix:$dst)>;
+
+def : Pat<(int_ppc_atomic_store_i128 i64:$lo, i64:$hi, ForceXForm:$dst),
+ (STQX_PSEUDO (BUILD_QUADWORD g8rc:$lo, g8rc:$hi), memrr:$dst)>;
+
// Stores with Update (pre-inc).
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 32d95254dce6a..c74b6bcf42873 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -3104,6 +3104,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
+ // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
case PPC::CFENCE8: {
auto Val = MI.getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
new file mode 100644
index 0000000000000..e60407d664b0e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \
+; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-track-subreg-liveness \
+; RUN: -ppc-quadword-atomics < %s | FileCheck --check-prefix=P8 %s
+
+define dso_local i128 @lq_unordered(i128* %src) {
+; P8-LABEL: lq_unordered:
+; P8: # %bb.0: # %entry
+; P8-NEXT: lq r4, 0(r3)
+; P8-NEXT: mr r3, r4
+; P8-NEXT: mr r4, r5
+; P8-NEXT: blr
+entry:
+ %0 = load atomic i128, i128* %src unordered, align 16
+ ret i128 %0
+}
+
+define dso_local i128 @lqx_unordered(i128* %src, i64 %idx) {
+; P8-LABEL: lqx_unordered:
+; P8: # %bb.0: # %entry
+; P8-NEXT: sldi r4, r4, 4
+; P8-NEXT: add r3, r3, r4
+; P8-NEXT: lq r4, 0(r3)
+; P8-NEXT: mr r3, r4
+; P8-NEXT: mr r4, r5
+; P8-NEXT: blr
+entry:
+ %0 = getelementptr i128, i128* %src, i64 %idx
+ %1 = load atomic i128, i128* %0 unordered, align 16
+ ret i128 %1
+}
+
+define dso_local i128 @lq_big_offset_unordered(i128* %src) {
+; P8-LABEL: lq_big_offset_unordered:
+; P8: # %bb.0: # %entry
+; P8-NEXT: lis r4, 32
+; P8-NEXT: add r3, r3, r4
+; P8-NEXT: lq r4, 0(r3)
+; P8-NEXT: mr r3, r4
+; P8-NEXT: mr r4, r5
+; P8-NEXT: blr
+entry:
+ %0 = getelementptr i128, i128* %src, i64 131072
+ %1 = load atomic i128, i128* %0 unordered, align 16
+ ret i128 %1
+}
+
+define dso_local i128 @lq_monotonic(i128* %src) {
+; P8-LABEL: lq_monotonic:
+; P8: # %bb.0: # %entry
+; P8-NEXT: lq r4, 0(r3)
+; P8-NEXT: mr r3, r4
+; P8-NEXT: mr r4, r5
+; P8-NEXT: blr
+entry:
+ %0 = load atomic i128, i128* %src monotonic, align 16
+ ret i128 %0
+}
+
+define dso_local i128 @lq_acquire(i128* %src) {
+; P8-LABEL: lq_acquire:
+; P8: # %bb.0: # %entry
+; P8-NEXT: lq r4, 0(r3)
+; P8-NEXT: cmpd cr7, r5, r5
+; P8-NEXT: mr r3, r4
+; P8-NEXT: mr r4, r5
+; P8-NEXT: bne- cr7, .+4
+; P8-NEXT: isync
+; P8-NEXT: blr
+entry:
+ %0 = load atomic i128, i128* %src acquire, align 16
+ ret i128 %0
+}
+
+define dso_local i128 @lq_seqcst(i128* %src) {
+; P8-LABEL: lq_seqcst:
+; P8: # %bb.0: # %entry
+; P8-NEXT: sync
+; P8-NEXT: lq r4, 0(r3)
+; P8-NEXT: cmpd cr7, r5, r5
+; P8-NEXT: mr r3, r4
+; P8-NEXT: mr r4, r5
+; P8-NEXT: bne- cr7, .+4
+; P8-NEXT: isync
+; P8-NEXT: blr
+entry:
+ %0 = load atomic i128, i128* %src seq_cst, align 16
+ ret i128 %0
+}
+
+define dso_local void @stq_unordered(i128 %val, i128* %dst) {
+; P8-LABEL: stq_unordered:
+; P8: # %bb.0: # %entry
+; P8-NEXT: mr r7, r4
+; P8-NEXT: mr r6, r3
+; P8-NEXT: stq r6, 0(r5)
+; P8-NEXT: blr
+entry:
+ store atomic i128 %val, i128* %dst unordered, align 16
+ ret void
+}
+
+define dso_local void @stqx_unordered(i128 %val, i128* %dst, i64 %idx) {
+; P8-LABEL: stqx_unordered:
+; P8: # %bb.0: # %entry
+; P8-NEXT: sldi r6, r6, 4
+; P8-NEXT: mr r9, r4
+; P8-NEXT: mr r8, r3
+; P8-NEXT: add r3, r5, r6
+; P8-NEXT: stq r8, 0(r3)
+; P8-NEXT: blr
+entry:
+ %0 = getelementptr i128, i128* %dst, i64 %idx
+ store atomic i128 %val, i128* %0 unordered, align 16
+ ret void
+}
+
+define dso_local void @stq_big_offset_unordered(i128 %val, i128* %dst) {
+; P8-LABEL: stq_big_offset_unordered:
+; P8: # %bb.0: # %entry
+; P8-NEXT: lis r6, 32
+; P8-NEXT: mr r9, r4
+; P8-NEXT: mr r8, r3
+; P8-NEXT: add r3, r5, r6
+; P8-NEXT: stq r8, 0(r3)
+; P8-NEXT: blr
+entry:
+ %0 = getelementptr i128, i128* %dst, i64 131072
+ store atomic i128 %val, i128* %0 unordered, align 16
+ ret void
+}
+
+define dso_local void @stq_monotonic(i128 %val, i128* %dst) {
+; P8-LABEL: stq_monotonic:
+; P8: # %bb.0: # %entry
+; P8-NEXT: mr r7, r4
+; P8-NEXT: mr r6, r3
+; P8-NEXT: stq r6, 0(r5)
+; P8-NEXT: blr
+entry:
+ store atomic i128 %val, i128* %dst monotonic, align 16
+ ret void
+}
+
+define dso_local void @stq_release(i128 %val, i128* %dst) {
+; P8-LABEL: stq_release:
+; P8: # %bb.0: # %entry
+; P8-NEXT: lwsync
+; P8-NEXT: mr r7, r4
+; P8-NEXT: mr r6, r3
+; P8-NEXT: stq r6, 0(r5)
+; P8-NEXT: blr
+entry:
+ store atomic i128 %val, i128* %dst release, align 16
+ ret void
+}
+
+define dso_local void @stq_seqcst(i128 %val, i128* %dst) {
+; P8-LABEL: stq_seqcst:
+; P8: # %bb.0: # %entry
+; P8-NEXT: sync
+; P8-NEXT: mr r7, r4
+; P8-NEXT: mr r6, r3
+; P8-NEXT: stq r6, 0(r5)
+; P8-NEXT: blr
+entry:
+ store atomic i128 %val, i128* %dst seq_cst, align 16
+ ret void
+}
More information about the llvm-commits
mailing list