[clang] [llvm] [LLVM] Add `__builtin_readsteadycounter` intrinsic and buiiltin for realtime clocks (PR #81331)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 10 05:24:23 PST 2024
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/81331
>From 109939223e7944472363134d72a223524e1e3f0a Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 9 Feb 2024 16:13:42 -0600
Subject: [PATCH] [WIP][LLVM] Add `__builtin_readsteadycounter` intrinsic and
buiiltin
Summary:
This patch adds a new intrinsic and builtin function mirroring the
existing `__builtin_readcyclecounter`. The difference is that this
implementation targets a separate counter that some targets have which
returns a fixed frequency clock that can be used to determine elapsed
time, this is different compared to the cycle counter which often has
variable frequency. This is currently only valid for the NVPTX and
AMDGPU targets.
---
clang/docs/LanguageExtensions.rst | 31 +++++++++++++++++++
clang/include/clang/Basic/Builtins.td | 6 ++++
clang/lib/CodeGen/CGBuiltin.cpp | 4 +++
clang/test/CodeGen/builtins.c | 6 ++++
llvm/include/llvm/CodeGen/ISDOpcodes.h | 6 ++++
llvm/include/llvm/IR/Intrinsics.td | 2 ++
llvm/include/llvm/Support/TargetOpcodes.def | 3 ++
llvm/include/llvm/Target/GenericOpcodes.td | 6 ++++
.../Target/GlobalISel/SelectionDAGCompat.td | 1 +
.../include/llvm/Target/TargetSelectionDAG.td | 3 ++
llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 2 ++
llvm/lib/CodeGen/IntrinsicLowering.cpp | 6 ++++
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 6 ++--
.../SelectionDAG/LegalizeIntegerTypes.cpp | 7 +++--
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +-
.../SelectionDAG/SelectionDAGBuilder.cpp | 8 +++++
.../SelectionDAG/SelectionDAGDumper.cpp | 1 +
llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 ++
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 ++
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 +
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 +++
llvm/lib/Target/AMDGPU/SMInstructions.td | 14 +++++++++
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 3 ++
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 1 -
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 4 +++
llvm/test/CodeGen/AMDGPU/readsteadycounter.ll | 24 ++++++++++++++
llvm/test/CodeGen/NVPTX/intrinsics.ll | 12 +++++++
27 files changed, 161 insertions(+), 7 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/readsteadycounter.ll
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index e91156837290f7..4cc73599f9bae0 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -2764,6 +2764,37 @@ Query for this feature with ``__has_builtin(__builtin_readcyclecounter)``. Note
that even if present, its use may depend on run-time privilege or other OS
controlled state.
+``__builtin_readsteadycounter``
+------------------------------
+
+``__builtin_readsteadycounter`` is used to access the fixed frequency counter
+register (or a similar steady-rate clock) on those targets that support it.
+The function is similar to ``__builtin_readcyclecounter`` above except that the
+frequency is fixed, making it suitable for measuring elapsed time.
+
+**Syntax**:
+
+.. code-block:: c++
+
+ __builtin_readsteadycounter()
+
+**Example of Use**:
+
+.. code-block:: c++
+
+ unsigned long long t0 = __builtin_readsteadycounter();
+ do_something();
+ unsigned long long t1 = __builtin_readsteadycounter();
+ unsigned long long secs_to_do_something = (t1 - t0) / tick_rate;
+
+**Description**:
+
+The ``__builtin_readsteadycounter()`` builtin returns the frequency counter value.
+When not supported by the target, the return value is always zero. This builtin
+takes no arguments and produces an unsigned long long result.
+
+Query for this feature with ``__has_builtin(__builtin_readsteadycounter)``.
+
``__builtin_dump_struct``
-------------------------
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 31a2bdeb2d3e5e..193d5851f9f29f 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1110,6 +1110,12 @@ def ReadCycleCounter : Builtin {
let Prototype = "unsigned long long int()";
}
+def ReadSteadyCounter : Builtin {
+ let Spellings = ["__builtin_readsteadycounter"];
+ let Attributes = [NoThrow];
+ let Prototype = "unsigned long long int()";
+}
+
def Trap : Builtin {
let Spellings = ["__builtin_trap"];
let Attributes = [NoThrow, NoReturn];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a7a410dab1a018..ee0b7504769622 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3443,6 +3443,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
return RValue::get(Builder.CreateCall(F));
}
+ case Builtin::BI__builtin_readsteadycounter: {
+ Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
+ return RValue::get(Builder.CreateCall(F));
+ }
case Builtin::BI__builtin___clear_cache: {
Value *Begin = EmitScalarExpr(E->getArg(0));
Value *End = EmitScalarExpr(E->getArg(1));
diff --git a/clang/test/CodeGen/builtins.c b/clang/test/CodeGen/builtins.c
index ed03233b6f1a96..88282120283b8a 100644
--- a/clang/test/CodeGen/builtins.c
+++ b/clang/test/CodeGen/builtins.c
@@ -496,6 +496,12 @@ long long test_builtin_readcyclecounter(void) {
return __builtin_readcyclecounter();
}
+// CHECK-LABEL: define{{.*}} i64 @test_builtin_readsteadycounter
+long long test_builtin_readsteadycounter(void) {
+ // CHECK: call i64 @llvm.readsteadycounter()
+ return __builtin_readsteadycounter();
+}
+
/// __builtin_launder should be a NOP in C since there are no vtables.
// CHECK-LABEL: define{{.*}} void @test_builtin_launder
void test_builtin_launder(int *p) {
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 349d1286c8dc4f..8cb0bc9fd98133 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1179,6 +1179,12 @@ enum NodeType {
/// counter-like register (or other high accuracy low latency clock source).
READCYCLECOUNTER,
+ /// READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
+ /// It has the same semantics as the READCYCLECOUNTER implementation except
+ /// that the result is the content of the architecture-specific fixed
+ /// frequency counter suitable for measuring elapsed time.
+ READSTEADYCOUNTER,
+
/// HANDLENODE node - Used as a handle for various purposes.
HANDLENODE,
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 3c19c7b063652c..4becdd71cd440d 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -870,6 +870,8 @@ def int_pcmarker : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
def int_readcyclecounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>;
+def int_readsteadycounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>;
+
// The assume intrinsic is marked InaccessibleMemOnly so that proper control
// dependencies will be maintained.
def int_assume : DefaultAttrsIntrinsic<
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index abb237083d254e..42cb854d950502 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -352,6 +352,9 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUNDEVEN)
/// INTRINSIC readcyclecounter
HANDLE_TARGET_OPCODE(G_READCYCLECOUNTER)
+/// INTRINSIC readsteadycounter
+HANDLE_TARGET_OPCODE(G_READSTEADYCOUNTER)
+
/// Generic load (including anyext load)
HANDLE_TARGET_OPCODE(G_LOAD)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 2c73b67f9e1af0..19197f50d9dff9 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1101,6 +1101,12 @@ def G_READCYCLECOUNTER : GenericInstruction {
let hasSideEffects = true;
}
+def G_READSTEADYCOUNTER : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins);
+ let hasSideEffects = true;
+}
+
//------------------------------------------------------------------------------
// Memory ops
//------------------------------------------------------------------------------
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index f792237203b431..f89658ef08ff04 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -167,6 +167,7 @@ def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>;
def : GINodeEquiv<G_FMAXIMUM, fmaximum>;
def : GINodeEquiv<G_FMINIMUM, fminimum>;
def : GINodeEquiv<G_READCYCLECOUNTER, readcyclecounter>;
+def : GINodeEquiv<G_READSTEADYCOUNTER, readsteadycounter>;
def : GINodeEquiv<G_ROTR, rotr>;
def : GINodeEquiv<G_ROTL, rotl>;
def : GINodeEquiv<G_LROUND, lround>;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 22360353790dbc..5f8bf0d448105d 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -657,6 +657,9 @@ def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch,
def readcyclecounter : SDNode<"ISD::READCYCLECOUNTER", SDTIntLeaf,
[SDNPHasChain, SDNPSideEffect]>;
+def readsteadycounter : SDNode<"ISD::READSTEADYCOUNTER", SDTIntLeaf,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def membarrier : SDNode<"ISD::MEMBARRIER", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index c1d8e890a66edb..311dd9d9739a6d 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1885,6 +1885,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_INTRINSIC_TRUNC;
case Intrinsic::readcyclecounter:
return TargetOpcode::G_READCYCLECOUNTER;
+ case Intrinsic::readsteadycounter:
+ return TargetOpcode::G_READSTEADYCOUNTER;
case Intrinsic::ptrmask:
return TargetOpcode::G_PTRMASK;
case Intrinsic::lrint:
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 61920a0e04ab59..fe450cba4a3332 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -312,6 +312,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
break;
}
+ case Intrinsic::readsteadycounter: {
+ errs() << "WARNING: this target does not support the llvm.readsteadycounter"
+ << " intrinsic. It is being lowered to a constant 0\n";
+ CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+ break;
+ }
case Intrinsic::dbg_declare:
case Intrinsic::dbg_label:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 892bfbd62f0d02..252b6e9997a710 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1127,8 +1127,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TargetLowering::Custom;
break;
case ISD::READCYCLECOUNTER:
- // READCYCLECOUNTER returns an i64, even if type legalization might have
- // expanded that to several smaller types.
+ case ISD::READSTEADYCOUNTER:
+ // READCYCLECOUNTER and READSTEADYCOUNTER return a i64, even if type
+ // legalization might have expanded that to several smaller types.
Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
break;
case ISD::READ_REGISTER:
@@ -3080,6 +3081,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Node->getOperand(0));
break;
case ISD::READCYCLECOUNTER:
+ case ISD::READSTEADYCOUNTER:
// If the target didn't expand this, just return 'zero' and preserve the
// chain.
Results.append(Node->getNumValues() - 1,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 39b7e061554141..274b9e7a045b8c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2648,7 +2648,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
- case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
+ case ISD::READCYCLECOUNTER:
+ case ISD::READSTEADYCOUNTER: ExpandIntRes_READCOUNTER(N, Lo, Hi); break;
case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
@@ -4026,8 +4027,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::ExpandIntRes_READCOUNTER(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
SDLoc DL(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 09f0bca8b8611e..91149871628574 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -439,7 +439,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_READCOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5ce1013f30fd1b..28664b2ed9052d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6781,6 +6781,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res.getValue(1));
return;
}
+ case Intrinsic::readsteadycounter: {
+ SDValue Op = getRoot();
+ Res = DAG.getNode(ISD::READSTEADYCOUNTER, sdl,
+ DAG.getVTList(MVT::i64, MVT::Other), Op);
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
case Intrinsic::bitreverse:
setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index a28d834f0522f2..45db2667e80959 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -104,6 +104,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+ case ISD::READSTEADYCOUNTER: return "ReadFixedTimer";
case ISD::SRCVALUE: return "SrcValue";
case ISD::MDNODE_SDNODE: return "MDNode";
case ISD::EntryToken: return "EntryToken";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index d8302ba2b42608..dc766928e5dc94 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -964,6 +964,9 @@ void TargetLoweringBase::initActions() {
// Most targets also ignore the @llvm.readcyclecounter intrinsic.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
+ // Most targets also ignore the @llvm.readsteadycounter intrinsic.
+ setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Expand);
+
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 97952de3e6a37b..53eab74a3f5728 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1957,6 +1957,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_READCYCLECOUNTER)
.legalFor({S64});
+ getActionDefinitionsBuilder(G_READSTEADYCOUNTER).legalFor({S64});
+
getActionDefinitionsBuilder(G_FENCE)
.alwaysLegal();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 5323e4fc58de80..b174d57bd57656 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4051,6 +4051,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_CONSTANT:
case AMDGPU::G_GLOBAL_VALUE:
case AMDGPU::G_BLOCK_ADDR:
+ case AMDGPU::G_READSTEADYCOUNTER:
case AMDGPU::G_READCYCLECOUNTER: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index a64a9e608f2173..f0db352cce4853 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -468,6 +468,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+
+ if (Subtarget->hasSMemRealTime() ||
+ Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11)
+ setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal);
setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Custom);
if (Subtarget->has16BitInsts()) {
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index f3096962e2f3e8..29651a8390399c 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -1068,6 +1068,20 @@ def : GCNPat <
}
} // let OtherPredicates = [HasShaderCyclesRegister]
+let OtherPredicates = [HasSMemRealTime] in {
+def : GCNPat <
+ (i64 (readsteadycounter)),
+ (S_MEMREALTIME)
+>;
+} // let OtherPredicates = [HasSMemRealTime]
+
+let SubtargetPredicate = isGFX11Plus in {
+def : GCNPat <
+ (i64 (readsteadycounter)),
+ (S_SENDMSG_RTN_B64 (i32 /*MSG_RTN_GET_REALTIME=*/0x83))
+>;
+} // let SubtargetPredicate = [isGFX11Plus]
+
def i32imm_zero : TImmLeaf <i32, [{
return Imm == 0;
}]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c7bc623a88e1b9..d7062808085362 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -489,6 +489,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand);
+ if (STI.getSmVersion() >= 30 && STI.getPTXVersion() > 31)
+ setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal);
+
setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 365afc6bd8c617..da99e3ec125345 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -3759,7 +3759,6 @@ def CALL_PROTOTYPE :
include "NVPTXIntrinsics.td"
-
//-----------------------------------
// Notes
//-----------------------------------
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 2330d7213c26dc..7bc40b8c82dfc1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -6372,12 +6372,16 @@ def INT_PTX_SREG_LANEMASK_GE :
def INT_PTX_SREG_LANEMASK_GT :
PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
+let hasSideEffects = 1 in {
def INT_PTX_SREG_CLOCK :
PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
def INT_PTX_SREG_CLOCK64 :
PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
def INT_PTX_SREG_GLOBALTIMER :
PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
+}
+
+def: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>;
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
diff --git a/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll b/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll
new file mode 100644
index 00000000000000..15f664c98182ae
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll
@@ -0,0 +1,24 @@
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX700
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
+
+declare i64 @llvm.readsteadycounter() #0
+
+; GCN-LABEL: {{^}}test_readsteadycounter:
+; GFX700: s_mov_b32 s[[REG:[0-9]+]], 0
+; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
+; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
+; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME)
+; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME)
+define amdgpu_kernel void @test_readsteadycounter(ptr addrspace(1) %out) #0 {
+ %cycle0 = call i64 @llvm.readsteadycounter()
+ store volatile i64 %cycle0, ptr addrspace(1) %out
+
+ %cycle1 = call i64 @llvm.readsteadycounter()
+ store volatile i64 %cycle1, ptr addrspace(1) %out
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll
index 2994f600d45c71..90702405df6a48 100644
--- a/llvm/test/CodeGen/NVPTX/intrinsics.ll
+++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll
@@ -151,6 +151,17 @@ define i64 @test_globaltimer() {
ret i64 %ret
}
+; CHECK-LABEL: test_steadycounter
+define i64 @test_steadycounter() {
+; CHECK: mov.u64 %r{{.*}}, %globaltimer;
+ %a = tail call i64 @llvm.readsteadycounter()
+; CHECK: mov.u64 %r{{.*}}, %globaltimer;
+ %b = tail call i64 @llvm.readsteadycounter()
+ %ret = add i64 %a, %b
+; CHECK: ret
+ ret i64 %ret
+}
+
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
@@ -161,6 +172,7 @@ declare i16 @llvm.ctpop.i16(i16)
declare i32 @llvm.ctpop.i32(i32)
declare i64 @llvm.ctpop.i64(i64)
+declare i64 @llvm.readsteadycounter()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.clock()
declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
More information about the llvm-commits
mailing list