[llvm] d0e4203 - [CSSPGO] MIR target-independent pseudo instruction for pseudo-probe intrinsic
Hongtao Yu via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 20 10:53:10 PST 2020
Author: Hongtao Yu
Date: 2020-11-20T10:52:43-08:00
New Revision: d0e42037bf0ce110cc8c6e9b536687fa40e90b99
URL: https://github.com/llvm/llvm-project/commit/d0e42037bf0ce110cc8c6e9b536687fa40e90b99
DIFF: https://github.com/llvm/llvm-project/commit/d0e42037bf0ce110cc8c6e9b536687fa40e90b99.diff
LOG: [CSSPGO] MIR target-independent pseudo instruction for pseudo-probe intrinsic
This change introduces a MIR target-independent pseudo instruction corresponding to the IR intrinsic llvm.pseudoprobe for pseudo-probe block instrumentation. Please refer to https://reviews.llvm.org/D86193 for the whole story.
An `llvm.pseudoprobe` intrinsic call will be lowered into a target-independent operation named `PSEUDO_PROBE`. Given the following instrumented IR,
```
define internal void @foo2(i32 %x, void (i32)* %f) !dbg !4 {
bb0:
%cmp = icmp eq i32 %x, 0
call void @llvm.pseudoprobe(i64 837061429793323041, i64 1)
br i1 %cmp, label %bb1, label %bb2
bb1:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 2)
br label %bb3
bb2:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 3)
br label %bb3
bb3:
call void @llvm.pseudoprobe(i64 837061429793323041, i64 4)
ret void
}
```
the corresponding MIR is shown below. Note that block `bb3` is duplicated into `bb1` and `bb2` where its probe is duplicated too. This allows for an accurate execution count to be collected for `bb3`, which is basically the sum of the counts of `bb1` and `bb2`.
```
bb.0.bb0:
frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp
TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags
PSEUDO_PROBE 837061429793323041, 1, 0
$edi = MOV32ri 1, debug-location !13; test.c:0
JCC_1 %bb.1, 4, implicit $eflags
bb.2.bb2:
PSEUDO_PROBE 837061429793323041, 3, 0
PSEUDO_PROBE 837061429793323041, 4, 0
$rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp
RETQ
bb.1.bb1:
PSEUDO_PROBE 837061429793323041, 2, 0
PSEUDO_PROBE 837061429793323041, 4, 0
$rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp
RETQ
```
The target op PSEUDO_PROBE will be converted into a piece of binary data by the object emitter with no machine instructions generated. This is done in a different patch.
Reviewed By: wmi
Differential Revision: https://reviews.llvm.org/D86495
Added:
llvm/test/Transforms/SampleProfile/pseudo-probe.mir
Modified:
llvm/include/llvm/CodeGen/ISDOpcodes.h
llvm/include/llvm/CodeGen/MachineInstr.h
llvm/include/llvm/CodeGen/SelectionDAG.h
llvm/include/llvm/CodeGen/SelectionDAGNodes.h
llvm/include/llvm/Support/TargetOpcodes.def
llvm/include/llvm/Target/Target.td
llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 269bb14508b8..2cacb572a0e1 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1106,6 +1106,10 @@ enum NodeType {
/// known nonzero constant. The only operand here is the chain.
GET_DYNAMIC_AREA_OFFSET,
+ /// Pseudo probe for AutoFDO, as a place holder in a basic block to improve
+ /// the sample counts quality.
+ PSEUDO_PROBE,
+
/// VSCALE(IMM) - Returns the runtime scaling factor used to calculate the
/// number of elements within a scalable vector. IMM is a constant integer
/// multiplier that is applied to the runtime value.
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 6247f2a2c9b2..6bbe2d03f9e5 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -1261,6 +1261,7 @@ class MachineInstr
case TargetOpcode::DBG_LABEL:
case TargetOpcode::LIFETIME_START:
case TargetOpcode::LIFETIME_END:
+ case TargetOpcode::PSEUDO_PROBE:
return true;
}
}
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 42fc296a682c..8966e7f51dd9 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1205,6 +1205,12 @@ class SelectionDAG {
SDValue getLifetimeNode(bool IsStart, const SDLoc &dl, SDValue Chain,
int FrameIndex, int64_t Size, int64_t Offset = -1);
+ /// Creates a PseudoProbeSDNode with function GUID `Guid` and
+ /// the index of the block `Index` it is probing, as well as the attributes
+ /// `attr` of the probe.
+ SDValue getPseudoProbeNode(const SDLoc &Dl, SDValue Chain, uint64_t Guid,
+ uint64_t Index, uint32_t Attr);
+
/// Create a MERGE_VALUES node from the given operands.
SDValue getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl);
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index eaad25c508ab..83158e240594 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1749,6 +1749,32 @@ class LifetimeSDNode : public SDNode {
}
};
+/// This SDNode is used for PSEUDO_PROBE values, which are the function guid and
+/// the index of the basic block being probed. A pseudo probe serves as a place
+/// holder and will be removed at the end of compilation. It does not have any
+/// operand because we do not want the instruction selection to deal with any.
+class PseudoProbeSDNode : public SDNode {
+ friend class SelectionDAG;
+ uint64_t Guid;
+ uint64_t Index;
+ uint32_t Attributes;
+
+ PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl,
+ SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr)
+ : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index),
+ Attributes(Attr) {}
+
+public:
+ uint64_t getGuid() const { return Guid; }
+ uint64_t getIndex() const { return Index; }
+ uint32_t getAttributes() const { return Attributes; }
+
+ // Methods to support isa and dyn_cast
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::PSEUDO_PROBE;
+ }
+};
+
class JumpTableSDNode : public SDNode {
friend class SelectionDAG;
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 38eb51b84ba7..a63d40484089 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -110,6 +110,9 @@ HANDLE_TARGET_OPCODE(BUNDLE)
HANDLE_TARGET_OPCODE(LIFETIME_START)
HANDLE_TARGET_OPCODE(LIFETIME_END)
+/// Pseudo probe
+HANDLE_TARGET_OPCODE(PSEUDO_PROBE)
+
/// A Stackmap instruction captures the location of live variables at its
/// position in the instruction stream. It is followed by a shadow of bytes
/// that must lie within the function and not contain another stackmap.
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 96fbfcc1f381..9664f70c7a02 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1145,6 +1145,13 @@ def LIFETIME_END : StandardPseudoInstruction {
let AsmString = "LIFETIME_END";
let hasSideEffects = false;
}
+def PSEUDO_PROBE : StandardPseudoInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins i64imm:$guid, i64imm:$index, i8imm:$type, i32imm:$attr);
+ let AsmString = "PSEUDO_PROBE";
+ let hasSideEffects = 1;
+}
+
def STACKMAP : StandardPseudoInstruction {
let OutOperandList = (outs);
let InOperandList = (ins i64imm:$id, i32imm:$nbytes, variable_ops);
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 95272d56f697..2b208cee1716 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1124,6 +1124,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
}
+ case ISD::PSEUDO_PROBE: {
+ unsigned TarOp = TargetOpcode::PSEUDO_PROBE;
+ auto Guid = cast<PseudoProbeSDNode>(Node)->getGuid();
+ auto Index = cast<PseudoProbeSDNode>(Node)->getIndex();
+ auto Attr = cast<PseudoProbeSDNode>(Node)->getAttributes();
+
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp))
+ .addImm(Guid)
+ .addImm(Index)
+ .addImm(0) // 0 for block probes
+ .addImm(Attr);
+ break;
+ }
+
case ISD::INLINEASM:
case ISD::INLINEASM_BR: {
unsigned NumOps = Node->getNumOperands();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2ae6f1cbec17..489651e987ac 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -572,6 +572,11 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset());
}
break;
+ case ISD::PSEUDO_PROBE:
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getGuid());
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getIndex());
+ ID.AddInteger(cast<PseudoProbeSDNode>(N)->getAttributes());
+ break;
case ISD::JumpTable:
case ISD::TargetJumpTable:
ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
@@ -6883,6 +6888,30 @@ SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getPseudoProbeNode(const SDLoc &Dl, SDValue Chain,
+ uint64_t Guid, uint64_t Index,
+ uint32_t Attr) {
+ const unsigned Opcode = ISD::PSEUDO_PROBE;
+ const auto VTs = getVTList(MVT::Other);
+ SDValue Ops[] = {Chain};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ ID.AddInteger(Guid);
+ ID.AddInteger(Index);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, Dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<PseudoProbeSDNode>(
+ Opcode, Dl.getIROrder(), Dl.getDebugLoc(), VTs, Guid, Index, Attr);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 82056aafb496..959bb52cb223 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6590,6 +6590,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
return;
}
+ case Intrinsic::pseudoprobe: {
+ auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
+ auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+ Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr);
+ DAG.setRoot(Res);
+ return;
+ }
case Intrinsic::invariant_start:
// Discard region information.
setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index df35f057072e..17a4a5cbf02a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -396,6 +396,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::DEBUGTRAP: return "debugtrap";
case ISD::LIFETIME_START: return "lifetime.start";
case ISD::LIFETIME_END: return "lifetime.end";
+ case ISD::PSEUDO_PROBE:
+ return "pseudoprobe";
case ISD::GC_TRANSITION_START: return "gc_transition.start";
case ISD::GC_TRANSITION_END: return "gc_transition.end";
case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 3f3eb354bee3..6c73842414cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2808,6 +2808,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::ANNOTATION_LABEL:
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
+ case ISD::PSEUDO_PROBE:
NodeToMatch->setNodeId(-1); // Mark selected.
return;
case ISD::AssertSext:
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe.mir b/llvm/test/Transforms/SampleProfile/pseudo-probe.mir
new file mode 100644
index 000000000000..8175a4769c85
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe.mir
@@ -0,0 +1,29 @@
+
+# REQUIRES: x86-registered-target
+# Ensure llc can read and parse MIR pseudo probe operations.
+# RUN: llc -O0 -mtriple x86_64-- -run-pass none %s -o - | FileCheck %s
+
+# CHECK: PSEUDO_PROBE 6699318081062747564, 1, 0, 0
+# CHECK: PSEUDO_PROBE 6699318081062747564, 3, 0, 0
+# CHECK: PSEUDO_PROBE 6699318081062747564, 4, 0, 0
+# CHECK: PSEUDO_PROBE 6699318081062747564, 2, 0, 0
+# CHECK: PSEUDO_PROBE 6699318081062747564, 4, 0, 0
+
+name: foo
+body: |
+ bb.0:
+ TEST32rr killed renamable $edi, renamable $edi, implicit-def $eflags
+ PSEUDO_PROBE 6699318081062747564, 1, 0, 0
+ JCC_1 %bb.1, 4, implicit $eflags
+
+ bb.2:
+ PSEUDO_PROBE 6699318081062747564, 3, 0, 0
+ PSEUDO_PROBE 6699318081062747564, 4, 0, 0
+ RETQ
+
+ bb.1:
+ PSEUDO_PROBE 6699318081062747564, 2, 0, 0
+ PSEUDO_PROBE 6699318081062747564, 4, 0, 0
+ RETQ
+
+...
More information about the llvm-commits
mailing list