[llvm] [AMDGPU] Implement readcyclecounter for GFX12 (PR #76965)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 4 06:58:53 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/76965.diff
5 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+9-1)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+5)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+42)
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+6)
- (modified) llvm/test/CodeGen/AMDGPU/readcyclecounter.ll (+12)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 060fb66d38f7bc..86d2d6cf3c5ebc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -761,6 +761,12 @@ def FeatureShaderCyclesRegister : SubtargetFeature<"shader-cycles-register",
"Has SHADER_CYCLES hardware register"
>;
+def FeatureShaderCyclesHiLoRegisters : SubtargetFeature<"shader-cycles-hi-lo-registers",
+ "HasShaderCyclesHiLoRegisters",
+ "true",
+ "Has SHADER_CYCLES_HI/LO hardware registers"
+>;
+
def FeatureMadMacF32Insts : SubtargetFeature<"mad-mac-f32-insts",
"HasMadMacF32Insts",
"true",
@@ -1469,7 +1475,7 @@ def FeatureISAVersion12 : FeatureSet<
FeatureNSAEncoding,
FeaturePartialNSAEncoding,
FeatureWavefrontSize32,
- FeatureShaderCyclesRegister,
+ FeatureShaderCyclesHiLoRegisters,
FeatureArchitectedFlatScratch,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
@@ -1970,6 +1976,8 @@ def HasSMemTimeInst : Predicate<"Subtarget->hasSMemTimeInst()">,
def HasShaderCyclesRegister : Predicate<"Subtarget->hasShaderCyclesRegister()">,
AssemblerPredicate<(all_of FeatureShaderCyclesRegister)>;
+def HasShaderCyclesHiLoRegisters : Predicate<"Subtarget->hasShaderCyclesHiLoRegisters()">;
+
def HasFP8Insts : Predicate<"Subtarget->hasFP8Insts()">,
AssemblerPredicate<(all_of FeatureFP8Insts)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 91a70930326955..4fef389eeacbea 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -176,6 +176,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasGetWaveIdInst = false;
bool HasSMemTimeInst = false;
bool HasShaderCyclesRegister = false;
+ bool HasShaderCyclesHiLoRegisters = false;
bool HasVOP3Literal = false;
bool HasNoDataDepHazard = false;
bool FlatAddressSpace = false;
@@ -819,6 +820,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return HasShaderCyclesRegister;
}
+ bool hasShaderCyclesHiLoRegisters() const {
+ return HasShaderCyclesHiLoRegisters;
+ }
+
bool hasVOP3Literal() const {
return HasVOP3Literal;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4f4bc45e49b43e..041355716b0825 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4827,6 +4827,48 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MI.eraseFromParent();
return BB;
}
+ case AMDGPU::GET_SHADERCYCLESHILO: {
+ assert(MF->getSubtarget<GCNSubtarget>().hasShaderCyclesHiLoRegisters());
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ // The algorithm is:
+ //
+ // hi1 = getreg(SHADER_CYCLES_HI)
+ // lo1 = getreg(SHADER_CYCLES_LO)
+ // hi2 = getreg(SHADER_CYCLES_HI)
+ //
+ // If hi1 == hi2 then there was no overflow and the result is hi2:lo1.
+ // Otherwise there was overflow and the result is hi2:0. In both cases the
+ // result should represent the actual time at some point during the sequence
+ // of three getregs.
+ Register RegHi1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_GETREG_B32), RegHi1)
+ .addImm(AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_SHADER_CYCLES_HI,
+ 0, 32));
+ Register RegLo1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_GETREG_B32), RegLo1)
+ .addImm(
+ AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_SHADER_CYCLES, 0, 32));
+ Register RegHi2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_GETREG_B32), RegHi2)
+ .addImm(AMDGPU::Hwreg::encodeHwreg(AMDGPU::Hwreg::ID_SHADER_CYCLES_HI,
+ 0, 32));
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
+ .addReg(RegHi1)
+ .addReg(RegHi2);
+ Register RegLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CSELECT_B32), RegLo)
+ .addReg(RegLo1)
+ .addImm(0);
+ BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE))
+ .add(MI.getOperand(0))
+ .addReg(RegLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(RegHi2)
+ .addImm(AMDGPU::sub1);
+ MI.eraseFromParent();
+ return BB;
+ }
case AMDGPU::SI_INDIRECT_SRC_V1:
case AMDGPU::SI_INDIRECT_SRC_V2:
case AMDGPU::SI_INDIRECT_SRC_V4:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index f9bc623abcd04b..55471107d41b53 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -316,6 +316,12 @@ def S_USUBO_PSEUDO : SPseudoInstSI <
(outs SReg_32:$sdst, SSrc_i1:$scc_out), (ins SSrc_b32:$src0, SSrc_b32:$src1)
>;
+let OtherPredicates = [HasShaderCyclesHiLoRegisters] in
+def GET_SHADERCYCLESHILO : SPseudoInstSI<
+ (outs SReg_64:$sdst), (ins),
+ [(set SReg_64:$sdst, (i64 (readcyclecounter)))]
+>;
+
} // End usesCustomInserter = 1, Defs = [SCC]
let usesCustomInserter = 1 in {
diff --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
index 7e0a486c8191e3..17b3fdc04ec934 100644
--- a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
+++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
@@ -8,12 +8,19 @@
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s
; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-SDAG -check-prefix=GCN %s
; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GETREG,GETREG-GISEL -check-prefix=GCN %s
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
declare i64 @llvm.readcyclecounter() #0
; GCN-LABEL: {{^}}test_readcyclecounter:
; MEMTIME-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
; GCN-DAG: s_load_{{dwordx2|b64}}
+; GFX12: s_getreg_b32 [[HI1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
+; GFX12: s_getreg_b32 [[LO1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_LO)
+; GFX12: s_getreg_b32 [[HI2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
+; GFX12: s_cmp_eq_u32 [[HI1]], [[HI2]]
+; GFX12: s_cselect_b32 {{s[0-9]+}}, [[LO1]], 0
; GCN-DAG: lgkmcnt
; MEMTIME: store_dwordx2
; SIVI-NOT: lgkmcnt
@@ -43,8 +50,13 @@ define amdgpu_kernel void @test_readcyclecounter(ptr addrspace(1) %out) #0 {
;
; GCN-LABEL: {{^}}test_readcyclecounter_smem:
; MEMTIME-DAG: s_memtime
+; GFX12: s_getreg_b32 [[HI1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
+; GFX12: s_getreg_b32 [[LO1:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_LO)
+; GFX12: s_getreg_b32 [[HI2:s[0-9]+]], hwreg(HW_REG_SHADER_CYCLES_HI)
; GCN-DAG: s_load_{{dword|b32|b64}}
; GETREG-DAG: s_getreg_b32 s{{[0-9]+}}, hwreg(HW_REG_SHADER_CYCLES, 0, 20)
+; GFX12: s_cmp_eq_u32 [[HI1]], [[HI2]]
+; GFX12: s_cselect_b32 {{s[0-9]+}}, [[LO1]], 0
define amdgpu_cs i32 @test_readcyclecounter_smem(ptr addrspace(4) inreg %in) #0 {
%cycle0 = call i64 @llvm.readcyclecounter()
%in.v = load i64, ptr addrspace(4) %in
``````````
</details>
https://github.com/llvm/llvm-project/pull/76965
More information about the llvm-commits
mailing list