[llvm] r303205 - [PPC] Lower load acquire/seq_cst trailing fence to cmp + bne + isync.
Tim Shen via llvm-commits
llvm-commits at lists.llvm.org
Tue May 16 13:18:07 PDT 2017
Author: timshen
Date: Tue May 16 15:18:06 2017
New Revision: 303205
URL: http://llvm.org/viewvc/llvm-project?rev=303205&view=rev
Log:
[PPC] Lower load acquire/seq_cst trailing fence to cmp + bne + isync.
Summary:
This fixes pr32392.
The lowering pipeline is:
llvm.ppc.cfence in IR -> PPC::CFENCE8 in isel -> Actual instructions in
expandPostRAPseudo.
The reason why expandPostRAPseudo is chosen is because previous passes
are likely eliminating instructions like cmpw 3, 3 (early CSE) and bne-
7, .+4 (some branch pass(s)).
Differential Revision: https://reviews.llvm.org/D32763
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
llvm/trunk/test/CodeGen/PowerPC/atomic-2.ll
llvm/trunk/test/CodeGen/PowerPC/atomics-indexed.ll
llvm/trunk/test/CodeGen/PowerPC/atomics-regression.ll
llvm/trunk/test/CodeGen/PowerPC/atomics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td?rev=303205&r1=303204&r2=303205&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsPowerPC.td Tue May 16 15:18:06 2017
@@ -1132,4 +1132,6 @@ def int_ppc_tsuspend : GCCBuiltin<"__bui
def int_ppc_ttest : GCCBuiltin<"__builtin_ttest">,
Intrinsic<[llvm_i64_ty], [], []>;
+
+def int_ppc_cfence : Intrinsic<[], [llvm_anyint_ty], []>;
}
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=303205&r1=303204&r2=303205&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue May 16 15:18:06 2017
@@ -410,6 +410,11 @@ PPCTargetLowering::PPCTargetLowering(con
// To handle counter-based loop conditions.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+
// Comparisons that require checking two conditions.
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
@@ -8184,6 +8189,26 @@ SDValue PPCTargetLowering::LowerINTRINSI
return Flags;
}
+SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
+ // the beginning of the argument list.
+ int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
+ SDLoc DL(Op);
+ switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
+ case Intrinsic::ppc_cfence: {
+ assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
+ return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
+ Op.getOperand(ArgStart + 1))),
+ 0);
+ }
+ default:
+ break;
+ }
+ return SDValue();
+}
+
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -8649,6 +8674,9 @@ SDValue PPCTargetLowering::LowerOperatio
// Frame & Return address.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+
+ case ISD::INTRINSIC_VOID:
+ return LowerINTRINSIC_VOID(Op, DAG);
}
}
@@ -8753,12 +8781,19 @@ Instruction *PPCTargetLowering::emitLead
Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
- if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord))
+ if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
+ // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
+ // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
+ // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
+ return Builder.CreateCall(
+ Intrinsic::getDeclaration(
+ Builder.GetInsertBlock()->getParent()->getParent(),
+ Intrinsic::ppc_cfence, {Inst->getType()}),
+ {Inst});
+ // FIXME: Can use isync for rmw operation.
return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
- // FIXME: this is too conservative, a dependent branch + isync is enough.
- // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
- // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
- // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
+ }
return nullptr;
}
Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=303205&r1=303204&r2=303205&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue May 16 15:18:06 2017
@@ -905,6 +905,7 @@ namespace llvm {
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td?rev=303205&r1=303204&r2=303205&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstr64Bit.td Tue May 16 15:18:06 2017
@@ -983,6 +983,10 @@ def LDgotTprelL: Pseudo<(outs g8rc:$rD),
[(set i64:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
isPPC64;
+
+let isBarrier = 1, isPseudo = 1, Defs = [CR7], Itinerary = IIC_LdStSync in
+def CFENCE8 : Pseudo<(outs), (ins g8rc:$cr), "#CFENCE8", []>;
+
def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
(ADD8TLS $in, tglobaltlsaddr:$g)>;
def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp?rev=303205&r1=303204&r2=303205&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.cpp Tue May 16 15:18:06 2017
@@ -1873,6 +1873,8 @@ PPCInstrInfo::getSerializableBitmaskMach
}
bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
+ auto &MBB = *MI.getParent();
+ auto DL = MI.getDebugLoc();
switch (MI.getOpcode()) {
case TargetOpcode::LOAD_STACK_GUARD: {
assert(Subtarget.isTargetLinux() &&
@@ -1920,6 +1922,17 @@ bool PPCInstrInfo::expandPostRAPseudo(Ma
MI.setDesc(get(Opcode));
return true;
}
+ case PPC::CFENCE8: {
+ auto Val = MI.getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(PPC::CMPW), PPC::CR7).addReg(Val).addReg(Val);
+ BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))
+ .addImm(PPC::PRED_NE_MINUS)
+ .addReg(PPC::CR7)
+ .addImm(1);
+ MI.setDesc(get(PPC::ISYNC));
+ MI.RemoveOperand(0);
+ return true;
+ }
}
return false;
}
Modified: llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td?rev=303205&r1=303204&r2=303205&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCInstrInfo.td Tue May 16 15:18:06 2017
@@ -1223,9 +1223,15 @@ let isBranch = 1, isTerminator = 1, hasC
// FIXME: should be able to write a pattern for PPCcondbranch, but can't use
// a two-value operand where a dag node expects two operands. :(
let isCodeGenOnly = 1 in {
- def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
- "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
- /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ class BCC_class : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
+ /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ def BCC : BCC_class;
+
+ // The same as BCC, except that it's not a terminator. Used for introducing
+ // control flow dependency without creating new blocks.
+ let isTerminator = 0 in def CTRL_DEP : BCC_class;
+
def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst),
"b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
Modified: llvm/trunk/test/CodeGen/PowerPC/atomic-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/atomic-2.ll?rev=303205&r1=303204&r2=303205&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/atomic-2.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/atomic-2.ll Tue May 16 15:18:06 2017
@@ -108,8 +108,10 @@ entry:
; CHECK: @atomic_load
%tmp = load atomic i64, i64* %mem acquire, align 64
; CHECK-NOT: ldarx
-; CHECK: ld
-; CHECK: lwsync
+; CHECK: ld [[VAL:[0-9]+]]
+; CHECK: cmpw [[CR:[0-9]+]], [[VAL]], [[VAL]]
+; CHECK: bne- [[CR]], .+4
+; CHECK: isync
ret i64 %tmp
}
Modified: llvm/trunk/test/CodeGen/PowerPC/atomics-indexed.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/atomics-indexed.ll?rev=303205&r1=303204&r2=303205&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/atomics-indexed.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/atomics-indexed.ll Tue May 16 15:18:06 2017
@@ -10,16 +10,22 @@
define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) {
; CHECK-LABEL: load_x_i8_seq_cst
; CHECK: sync
-; CHECK: lbzx
-; CHECK: lwsync
+; CHECK: lbzx [[VAL:r[0-9]+]]
+; CHECK-PPC32: lwsync
+; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
+; CHECK-PPC64: bne- [[CR]], .+4
+; CHECK-PPC64: isync
%ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000
%val = load atomic i8, i8* %ptr seq_cst, align 1
ret i8 %val
}
define i16 @load_x_i16_acquire([100000 x i16]* %mem) {
; CHECK-LABEL: load_x_i16_acquire
-; CHECK: lhzx
-; CHECK: lwsync
+; CHECK: lhzx [[VAL:r[0-9]+]]
+; CHECK-PPC32: lwsync
+; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
+; CHECK-PPC64: bne- [[CR]], .+4
+; CHECK-PPC64: isync
%ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000
%val = load atomic i16, i16* %ptr acquire, align 2
ret i16 %val
Modified: llvm/trunk/test/CodeGen/PowerPC/atomics-regression.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/atomics-regression.ll?rev=303205&r1=303204&r2=303205&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/atomics-regression.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/atomics-regression.ll Tue May 16 15:18:06 2017
@@ -23,7 +23,9 @@ define i8 @test2(i8* %ptr) {
; PPC64LE-LABEL: test2:
; PPC64LE: # BB#0:
; PPC64LE-NEXT: lbz 3, 0(3)
-; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: cmpw 7, 3, 3
+; PPC64LE-NEXT: bne- 7, .+4
+; PPC64LE-NEXT: isync
; PPC64LE-NEXT: blr
%val = load atomic i8, i8* %ptr acquire, align 1
ret i8 %val
@@ -35,7 +37,9 @@ define i8 @test3(i8* %ptr) {
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: ori 2, 2, 0
; PPC64LE-NEXT: lbz 3, 0(3)
-; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: cmpw 7, 3, 3
+; PPC64LE-NEXT: bne- 7, .+4
+; PPC64LE-NEXT: isync
; PPC64LE-NEXT: blr
%val = load atomic i8, i8* %ptr seq_cst, align 1
ret i8 %val
@@ -63,7 +67,9 @@ define i16 @test6(i16* %ptr) {
; PPC64LE-LABEL: test6:
; PPC64LE: # BB#0:
; PPC64LE-NEXT: lhz 3, 0(3)
-; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: cmpw 7, 3, 3
+; PPC64LE-NEXT: bne- 7, .+4
+; PPC64LE-NEXT: isync
; PPC64LE-NEXT: blr
%val = load atomic i16, i16* %ptr acquire, align 2
ret i16 %val
@@ -75,7 +81,9 @@ define i16 @test7(i16* %ptr) {
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: ori 2, 2, 0
; PPC64LE-NEXT: lhz 3, 0(3)
-; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: cmpw 7, 3, 3
+; PPC64LE-NEXT: bne- 7, .+4
+; PPC64LE-NEXT: isync
; PPC64LE-NEXT: blr
%val = load atomic i16, i16* %ptr seq_cst, align 2
ret i16 %val
@@ -103,7 +111,9 @@ define i32 @test10(i32* %ptr) {
; PPC64LE-LABEL: test10:
; PPC64LE: # BB#0:
; PPC64LE-NEXT: lwz 3, 0(3)
-; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: cmpw 7, 3, 3
+; PPC64LE-NEXT: bne- 7, .+4
+; PPC64LE-NEXT: isync
; PPC64LE-NEXT: blr
%val = load atomic i32, i32* %ptr acquire, align 4
ret i32 %val
@@ -115,7 +125,9 @@ define i32 @test11(i32* %ptr) {
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: ori 2, 2, 0
; PPC64LE-NEXT: lwz 3, 0(3)
-; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: cmpw 7, 3, 3
+; PPC64LE-NEXT: bne- 7, .+4
+; PPC64LE-NEXT: isync
; PPC64LE-NEXT: blr
%val = load atomic i32, i32* %ptr seq_cst, align 4
ret i32 %val
@@ -143,7 +155,9 @@ define i64 @test14(i64* %ptr) {
; PPC64LE-LABEL: test14:
; PPC64LE: # BB#0:
; PPC64LE-NEXT: ld 3, 0(3)
-; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: cmpw 7, 3, 3
+; PPC64LE-NEXT: bne- 7, .+4
+; PPC64LE-NEXT: isync
; PPC64LE-NEXT: blr
%val = load atomic i64, i64* %ptr acquire, align 8
ret i64 %val
@@ -155,7 +169,9 @@ define i64 @test15(i64* %ptr) {
; PPC64LE-NEXT: sync
; PPC64LE-NEXT: ori 2, 2, 0
; PPC64LE-NEXT: ld 3, 0(3)
-; PPC64LE-NEXT: lwsync
+; PPC64LE-NEXT: cmpw 7, 3, 3
+; PPC64LE-NEXT: bne- 7, .+4
+; PPC64LE-NEXT: isync
; PPC64LE-NEXT: blr
%val = load atomic i64, i64* %ptr seq_cst, align 8
ret i64 %val
@@ -9544,3 +9560,35 @@ define i64 @test559(i64* %ptr, i64 %val)
%ret = atomicrmw umin i64* %ptr, i64 %val singlethread seq_cst
ret i64 %ret
}
+
+; The second load should never be scheduled before isync.
+define i32 @test_ordering0(i32* %ptr1, i32* %ptr2) {
+; PPC64LE-LABEL: test_ordering0:
+; PPC64LE: # BB#0:
+; PPC64LE-NEXT: lwz 4, 0(3)
+; PPC64LE-NEXT: cmpw 7, 4, 4
+; PPC64LE-NEXT: bne- 7, .+4
+; PPC64LE-NEXT: isync
+; PPC64LE-NEXT: lwz 3, 0(3)
+; PPC64LE-NEXT: add 3, 4, 3
+; PPC64LE-NEXT: blr
+ %val1 = load atomic i32, i32* %ptr1 acquire, align 4
+ %val2 = load i32, i32* %ptr1
+ %add = add i32 %val1, %val2
+ ret i32 %add
+}
+
+; The second store should never be scheduled before isync.
+define i32 @test_ordering1(i32* %ptr1, i32 %val1, i32* %ptr2) {
+; PPC64LE-LABEL: test_ordering1:
+; PPC64LE: # BB#0:
+; PPC64LE-NEXT: lwz 3, 0(3)
+; PPC64LE-NEXT: cmpw 7, 3, 3
+; PPC64LE-NEXT: bne- 7, .+4
+; PPC64LE-NEXT: isync
+; PPC64LE-NEXT: stw 4, 0(5)
+; PPC64LE-NEXT: blr
+ %val2 = load atomic i32, i32* %ptr1 acquire, align 4
+ store i32 %val1, i32* %ptr2
+ ret i32 %val2
+}
Modified: llvm/trunk/test/CodeGen/PowerPC/atomics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/atomics.ll?rev=303205&r1=303204&r2=303205&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/atomics.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/atomics.ll Tue May 16 15:18:06 2017
@@ -25,9 +25,12 @@ define i16 @load_i16_monotonic(i16* %mem
}
define i32 @load_i32_acquire(i32* %mem) {
; CHECK-LABEL: load_i32_acquire
-; CHECK: lwz
+; CHECK: lwz [[VAL:r[0-9]+]]
%val = load atomic i32, i32* %mem acquire, align 4
-; CHECK: lwsync
+; CHECK-PPC32: lwsync
+; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
+; CHECK-PPC64: bne- [[CR]], .+4
+; CHECK-PPC64: isync
ret i32 %val
}
define i64 @load_i64_seq_cst(i64* %mem) {
@@ -35,9 +38,12 @@ define i64 @load_i64_seq_cst(i64* %mem)
; CHECK: sync
; PPC32: __sync_
; PPC64-NOT: __sync_
-; PPC64: ld
+; PPC64: ld [[VAL:r[0-9]+]]
%val = load atomic i64, i64* %mem seq_cst, align 8
-; CHECK: lwsync
+; CHECK-PPC32: lwsync
+; CHECK-PPC64: cmpw [[CR:cr[0-9]+]], [[VAL]], [[VAL]]
+; CHECK-PPC64: bne- [[CR]], .+4
+; CHECK-PPC64: isync
ret i64 %val
}
More information about the llvm-commits
mailing list