[llvm] 5641422 - [PowerPC] Use 'sync; ld; cmp; bc; isync' for atomic load seq-cst on 32-bit platform (#75905)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 19 18:01:06 PST 2023
Author: Kai Luo
Date: 2023-12-20T10:01:02+08:00
New Revision: 56414220dfeb274a15beb55ab3da757978a0255f
URL: https://github.com/llvm/llvm-project/commit/56414220dfeb274a15beb55ab3da757978a0255f
DIFF: https://github.com/llvm/llvm-project/commit/56414220dfeb274a15beb55ab3da757978a0255f.diff
LOG: [PowerPC] Use 'sync; ld; cmp; bc; isync' for atomic load seq-cst on 32-bit platform (#75905)
`cmp; bc; isync` is more performant than `lwsync` theoretically.
64-bit platform already features it, now implement it for 32-bit
platform.
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/lib/Target/PowerPC/PPCInstrInfo.td
llvm/test/CodeGen/PowerPC/atomics-indexed.ll
llvm/test/CodeGen/PowerPC/atomics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index acaf98b62fd94d..ae0d3b76f89a36 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10984,7 +10984,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
case Intrinsic::ppc_cfence: {
assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
- assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
SDValue Val = Op.getOperand(ArgStart + 1);
EVT Ty = Val.getValueType();
if (Ty == MVT::i128) {
@@ -10992,9 +10991,11 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
// ordering?
Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
}
+ unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
+ EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
return SDValue(
- DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
+ DAG.getMachineNode(Opcode, DL, MVT::Other,
+ DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
Op.getOperand(0)),
0);
}
@@ -11825,7 +11826,7 @@ Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
// and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
- if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
+ if (isa<LoadInst>(Inst))
return Builder.CreateCall(
Intrinsic::getDeclaration(
Builder.GetInsertBlock()->getParent()->getParent(),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 4dfd0358a16fd4..aaced58defe603 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -3177,9 +3177,11 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
}
// FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
+ case PPC::CFENCE:
case PPC::CFENCE8: {
auto Val = MI.getOperand(0).getReg();
- BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
+ unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
+ BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
.addImm(PPC::PRED_NE_MINUS)
.addReg(PPC::CR7)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 6199785206b2f7..b1601739fd4569 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5260,6 +5260,9 @@ def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs),
"hashchkp $RB, $addr", IIC_IntGeneral, []>;
}
+let Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;
+
// Now both high word and low word are reversed, next
// swap the high word and low word.
def : Pat<(i64 (bitreverse i64:$A)),
diff --git a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
index 3ca804d820683f..986c0de2fb4923 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
@@ -15,7 +15,9 @@ define i8 @load_x_i8_seq_cst(ptr %mem) {
; PPC32-NEXT: sync
; PPC32-NEXT: ori r4, r4, 24464
; PPC32-NEXT: lbzx r3, r3, r4
-; PPC32-NEXT: lwsync
+; PPC32-NEXT: cmpw cr7, r3, r3
+; PPC32-NEXT: bne- cr7, .+4
+; PPC32-NEXT: isync
; PPC32-NEXT: blr
;
; PPC64-LABEL: load_x_i8_seq_cst:
@@ -38,7 +40,9 @@ define i16 @load_x_i16_acquire(ptr %mem) {
; PPC32-NEXT: lis r4, 2
; PPC32-NEXT: ori r4, r4, 48928
; PPC32-NEXT: lhzx r3, r3, r4
-; PPC32-NEXT: lwsync
+; PPC32-NEXT: cmpw cr7, r3, r3
+; PPC32-NEXT: bne- cr7, .+4
+; PPC32-NEXT: isync
; PPC32-NEXT: blr
;
; PPC64-LABEL: load_x_i16_acquire:
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 23ff5f69269161..04cdbe9d7e7859 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -29,7 +29,9 @@ define i32 @load_i32_acquire(ptr %mem) {
; PPC32-LABEL: load_i32_acquire:
; PPC32: # %bb.0:
; PPC32-NEXT: lwz r3, 0(r3)
-; PPC32-NEXT: lwsync
+; PPC32-NEXT: cmpw cr7, r3, r3
+; PPC32-NEXT: bne- cr7, .+4
+; PPC32-NEXT: isync
; PPC32-NEXT: blr
;
; PPC64-LABEL: load_i32_acquire:
More information about the llvm-commits
mailing list