[llvm] [PowerPC] Use 'sync; ld; cmp; bc; isync' for load seq-cst on 32-bit platform (PR #75905)

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 19 00:12:27 PST 2023


https://github.com/bzEq created https://github.com/llvm/llvm-project/pull/75905

`cmp; bc; isync` is more performant than `lwsync` theoretically.

>From 65369e2ab09cc5c8997c26deff9f89d4c60d63f2 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail at cn.ibm.com>
Date: Tue, 19 Dec 2023 08:10:31 +0000
Subject: [PATCH] =?UTF-8?q?Use=20'sync;=20ld;=20cmp;=20bc;=20isync'?=
 =?UTF-8?q?=E2=80=82for=20load=20seq-cst=20on=2032-bit=20platform?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp  | 9 +++++----
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp     | 4 +++-
 llvm/lib/Target/PowerPC/PPCInstrInfo.td      | 4 ++++
 llvm/test/CodeGen/PowerPC/atomics-indexed.ll | 8 ++++++--
 llvm/test/CodeGen/PowerPC/atomics.ll         | 4 +++-
 5 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index acaf98b62fd94d..ae0d3b76f89a36 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10984,7 +10984,6 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
   case Intrinsic::ppc_cfence: {
     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
-    assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
     SDValue Val = Op.getOperand(ArgStart + 1);
     EVT Ty = Val.getValueType();
     if (Ty == MVT::i128) {
@@ -10992,9 +10991,11 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
       // ordering?
       Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
     }
+    unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
+    EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
     return SDValue(
-        DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
-                           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
+        DAG.getMachineNode(Opcode, DL, MVT::Other,
+                           DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
                            Op.getOperand(0)),
         0);
   }
@@ -11825,7 +11826,7 @@ Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
-    if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
+    if (isa<LoadInst>(Inst))
       return Builder.CreateCall(
           Intrinsic::getDeclaration(
               Builder.GetInsertBlock()->getParent()->getParent(),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 4dfd0358a16fd4..aaced58defe603 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -3177,9 +3177,11 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   }
 
     // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
+  case PPC::CFENCE:
   case PPC::CFENCE8: {
     auto Val = MI.getOperand(0).getReg();
-    BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
+    unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;
+    BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);
     BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
         .addImm(PPC::PRED_NE_MINUS)
         .addReg(PPC::CR7)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 6199785206b2f7..410358d5aaf529 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5260,6 +5260,9 @@ def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs),
                                  "hashchkp $RB, $addr", IIC_IntGeneral, []>;
 }
 
+let Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE : PPCPostRAExpPseudo<(outs), (ins gprc:$cr), "#CFENCE", []>;
+
 // Now both high word and low word are reversed, next
 // swap the high word and low word.
 def : Pat<(i64 (bitreverse i64:$A)),
@@ -5313,3 +5316,4 @@ def : Pat<(int_ppc_dcbtt ForceXForm:$dst),
 
 def : Pat<(int_ppc_stfiw ForceXForm:$dst, f64:$XT),
           (STFIWX f64:$XT, ForceXForm:$dst)>;
+
diff --git a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
index 3ca804d820683f..986c0de2fb4923 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
@@ -15,7 +15,9 @@ define i8 @load_x_i8_seq_cst(ptr %mem) {
 ; PPC32-NEXT:    sync
 ; PPC32-NEXT:    ori r4, r4, 24464
 ; PPC32-NEXT:    lbzx r3, r3, r4
-; PPC32-NEXT:    lwsync
+; PPC32-NEXT:    cmpw cr7, r3, r3
+; PPC32-NEXT:    bne- cr7, .+4
+; PPC32-NEXT:    isync
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: load_x_i8_seq_cst:
@@ -38,7 +40,9 @@ define i16 @load_x_i16_acquire(ptr %mem) {
 ; PPC32-NEXT:    lis r4, 2
 ; PPC32-NEXT:    ori r4, r4, 48928
 ; PPC32-NEXT:    lhzx r3, r3, r4
-; PPC32-NEXT:    lwsync
+; PPC32-NEXT:    cmpw cr7, r3, r3
+; PPC32-NEXT:    bne- cr7, .+4
+; PPC32-NEXT:    isync
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: load_x_i16_acquire:
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 23ff5f69269161..04cdbe9d7e7859 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -29,7 +29,9 @@ define i32 @load_i32_acquire(ptr %mem) {
 ; PPC32-LABEL: load_i32_acquire:
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    lwz r3, 0(r3)
-; PPC32-NEXT:    lwsync
+; PPC32-NEXT:    cmpw cr7, r3, r3
+; PPC32-NEXT:    bne- cr7, .+4
+; PPC32-NEXT:    isync
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: load_i32_acquire:



More information about the llvm-commits mailing list