[llvm] [SPIR-V] Add cl_khr_kernel_clock / SPV_KHR_shader_clock extension (PR #92771)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 08:25:48 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-spir-v
Author: Sven van Haastregt (svenvh)
<details>
<summary>Changes</summary>
Recognize `cl_khr_kernel_clock` builtins and translate them to `OpReadClockKHR` instructions. The `Scope` operand is deduced from the builtin function name.
spirv-val does not pass yet due to OpReadClockKHR only supporting the valid scopes for Vulkan (Device and Subgroup, but not Workgroup), so leave validation disabled with a TODO.
Provisional extension description: https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#cl_khr_kernel_clock .
Builtins: https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_C.html#kernel-clock-functions .
SPIR-V environment: https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Env.html#_cl_khr_kernel_clock .
---
Full diff: https://github.com/llvm/llvm-project/pull/92771.diff
7 Files Affected:
- (modified) llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp (+35)
- (modified) llvm/lib/Target/SPIRV/SPIRVBuiltins.td (+9)
- (modified) llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp (+2)
- (modified) llvm/lib/Target/SPIRV/SPIRVInstrInfo.td (+5)
- (modified) llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp (+8)
- (modified) llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td (+1)
- (added) llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_shader_clock/shader_clock.ll (+87)
``````````diff
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 9fde26c900f51..424087f361a6a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -1118,6 +1118,39 @@ static bool generateGroupUniformInst(const SPIRV::IncomingCall *Call,
return true;
}
+static bool generateKernelClockInst(const SPIRV::IncomingCall *Call,
+ MachineIRBuilder &MIRBuilder,
+ SPIRVGlobalRegistry *GR) {
+ const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
+ MachineFunction &MF = MIRBuilder.getMF();
+ const auto *ST = static_cast<const SPIRVSubtarget *>(&MF.getSubtarget());
+ if (!ST->canUseExtension(SPIRV::Extension::SPV_KHR_shader_clock)) {
+ std::string DiagMsg = std::string(Builtin->Name) +
+ ": the builtin requires the following SPIR-V "
+ "extension: SPV_KHR_shader_clock";
+ report_fatal_error(DiagMsg.c_str(), false);
+ }
+
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ Register ResultReg = Call->ReturnRegister;
+ MRI->setRegClass(ResultReg, &SPIRV::IDRegClass);
+
+ // Deduce the `Scope` operand from the builtin function name.
+ SPIRV::Scope::Scope ScopeArg =
+ StringSwitch<SPIRV::Scope::Scope>(Builtin->Name)
+ .EndsWith("device", SPIRV::Scope::Scope::Device)
+ .EndsWith("work_group", SPIRV::Scope::Scope::Workgroup)
+ .EndsWith("sub_group", SPIRV::Scope::Scope::Subgroup);
+ Register ScopeReg = buildConstantIntReg(ScopeArg, MIRBuilder, GR);
+
+ MIRBuilder.buildInstr(SPIRV::OpReadClockKHR)
+ .addDef(ResultReg)
+ .addUse(GR->getSPIRVTypeID(Call->ReturnType))
+ .addUse(ScopeReg);
+
+ return true;
+}
+
// These queries ask for a single size_t result for a given dimension index, e.g
// size_t get_global_id(uint dimindex). In SPIR-V, the builtins corresonding to
// these values are all vec3 types, so we need to extract the correct index or
@@ -2290,6 +2323,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
return generateIntelSubgroupsInst(Call.get(), MIRBuilder, GR);
case SPIRV::GroupUniform:
return generateGroupUniformInst(Call.get(), MIRBuilder, GR);
+ case SPIRV::KernelClock:
+ return generateKernelClockInst(Call.get(), MIRBuilder, GR);
}
return false;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 564028547821e..692234c405ab6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -58,6 +58,7 @@ def LoadStore : BuiltinGroup;
def IntelSubgroups : BuiltinGroup;
def AtomicFloating : BuiltinGroup;
def GroupUniform : BuiltinGroup;
+def KernelClock : BuiltinGroup;
//===----------------------------------------------------------------------===//
// Class defining a demangled builtin record. The information in the record
@@ -952,6 +953,14 @@ defm : DemangledGroupBuiltin<"group_scan_exclusive_logical_xor", OnlyWork, OpGro
defm : DemangledGroupBuiltin<"group_scan_inclusive_logical_xor", OnlyWork, OpGroupLogicalXorKHR>;
defm : DemangledGroupBuiltin<"group_reduce_logical_xor", OnlyWork, OpGroupLogicalXorKHR>;
+// cl_khr_kernel_clock / SPV_KHR_shader_clock
+defm : DemangledNativeBuiltin<"clock_read_device", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+defm : DemangledNativeBuiltin<"clock_read_work_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+defm : DemangledNativeBuiltin<"clock_read_sub_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+defm : DemangledNativeBuiltin<"clock_read_hilo_device", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+defm : DemangledNativeBuiltin<"clock_read_hilo_work_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+defm : DemangledNativeBuiltin<"clock_read_hilo_sub_group", OpenCL_std, KernelClock, 0, 0, OpReadClockKHR>;
+
//===----------------------------------------------------------------------===//
// Class defining an atomic instruction on floating-point numbers.
//
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 691e6ee0e5829..752d71eddd99a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -55,6 +55,8 @@ static const std::map<std::string, SPIRV::Extension::Extension>
SPIRV::Extension::Extension::SPV_INTEL_variable_length_array},
{"SPV_INTEL_function_pointers",
SPIRV::Extension::Extension::SPV_INTEL_function_pointers},
+ {"SPV_KHR_shader_clock",
+ SPIRV::Extension::Extension::SPV_KHR_shader_clock},
};
bool SPIRVExtensionsParser::parse(cl::Option &O, llvm::StringRef ArgName,
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index 151d0ec1fe569..a6bedab6d4ee5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -802,6 +802,11 @@ def OpGroupNonUniformRotateKHR: Op<4431, (outs ID:$res),
(ins TYPE:$type, ID:$scope, ID:$value, ID:$delta, variable_ops),
"$res = OpGroupNonUniformRotateKHR $type $scope $value $delta">;
+// SPV_KHR_shader_clock
+def OpReadClockKHR: Op<5056, (outs ID:$res),
+ (ins TYPE:$type, ID:$scope),
+ "$res = OpReadClockKHR $type $scope">;
+
// 3.49.7, Constant-Creation Instructions
// - SPV_INTEL_function_pointers
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index 235f947901d83..cbe7c5ca30570 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1117,6 +1117,14 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(SPIRV::Capability::GroupUniformArithmeticKHR);
}
break;
+ case SPIRV::OpReadClockKHR:
+ if (!ST.canUseExtension(SPIRV::Extension::SPV_KHR_shader_clock))
+ report_fatal_error("OpReadClockKHR instruction requires the "
+ "following SPIR-V extension: SPV_KHR_shader_clock",
+ false);
+ Reqs.addExtension(SPIRV::Extension::SPV_KHR_shader_clock);
+ Reqs.addCapability(SPIRV::Capability::ShaderClockKHR);
+ break;
case SPIRV::OpFunctionPointerCallINTEL:
if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_function_pointers)) {
Reqs.addExtension(SPIRV::Extension::SPV_INTEL_function_pointers);
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 31e19ad8630cd..50d327179fa84 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -413,6 +413,7 @@ defm ImageGatherBiasLodAMD : CapabilityOperand<5009, 0, 0, [], [Shader]>;
defm FragmentMaskAMD : CapabilityOperand<5010, 0, 0, [], [Shader]>;
defm StencilExportEXT : CapabilityOperand<5013, 0, 0, [], [Shader]>;
defm ImageReadWriteLodAMD : CapabilityOperand<5015, 0, 0, [], [Shader]>;
+defm ShaderClockKHR : CapabilityOperand<5055, 0, 0, [SPV_KHR_shader_clock], []>;
defm SampleMaskOverrideCoverageNV : CapabilityOperand<5249, 0, 0, [], [SampleRateShading]>;
defm GeometryShaderPassthroughNV : CapabilityOperand<5251, 0, 0, [], [Geometry]>;
defm ShaderViewportIndexLayerEXT : CapabilityOperand<5254, 0, 0, [], [MultiViewport]>;
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_shader_clock/shader_clock.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_shader_clock/shader_clock.ll
new file mode 100644
index 0000000000000..2c984c7017f95
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_KHR_shader_clock/shader_clock.ll
@@ -0,0 +1,87 @@
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_KHR_shader_clock %s -o - | FileCheck %s
+; TODO: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_KHR_shader_clock %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-ERROR: LLVM ERROR: clock_read_device: the builtin requires the following SPIR-V extension: SPV_KHR_shader_clock
+
+; CHECK: OpCapability ShaderClockKHR
+; CHECK: OpExtension "SPV_KHR_shader_clock"
+; CHECK-DAG: [[uint:%[a-z0-9_]+]] = OpTypeInt 32
+; CHECK-DAG: [[ulong:%[a-z0-9_]+]] = OpTypeInt 64
+; CHECK-DAG: [[v2uint:%[a-z0-9_]+]] = OpTypeVector [[uint]] 2
+; CHECK-DAG: [[uint_1:%[a-z0-9_]+]] = OpConstant [[uint]] 1
+; CHECK-DAG: [[uint_2:%[a-z0-9_]+]] = OpConstant [[uint]] 2
+; CHECK-DAG: [[uint_3:%[a-z0-9_]+]] = OpConstant [[uint]] 3
+; CHECK: OpReadClockKHR [[ulong]] [[uint_1]]
+; CHECK: OpReadClockKHR [[ulong]] [[uint_2]]
+; CHECK: OpReadClockKHR [[ulong]] [[uint_3]]
+; CHECK: OpReadClockKHR [[v2uint]] [[uint_1]]
+; CHECK: OpReadClockKHR [[v2uint]] [[uint_2]]
+; CHECK: OpReadClockKHR [[v2uint]] [[uint_3]]
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1"
+target triple = "spir-unknown-unknown"
+
+; Function Attrs: convergent norecurse nounwind
+define dso_local spir_kernel void @test_clocks(ptr addrspace(1) nocapture noundef writeonly align 8 %out64, ptr addrspace(1) nocapture noundef writeonly align 8 %outv2) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
+entry:
+ %call = tail call spir_func i64 @_Z17clock_read_devicev() #2
+ store i64 %call, ptr addrspace(1) %out64, align 8, !tbaa !8
+ %call1 = tail call spir_func i64 @_Z21clock_read_work_groupv() #2
+ %arrayidx2 = getelementptr inbounds i8, ptr addrspace(1) %out64, i32 8
+ store i64 %call1, ptr addrspace(1) %arrayidx2, align 8, !tbaa !8
+ %call3 = tail call spir_func i64 @_Z20clock_read_sub_groupv() #2
+ %arrayidx4 = getelementptr inbounds i8, ptr addrspace(1) %out64, i32 16
+ store i64 %call3, ptr addrspace(1) %arrayidx4, align 8, !tbaa !8
+ %call5 = tail call spir_func <2 x i32> @_Z22clock_read_hilo_devicev() #2
+ store <2 x i32> %call5, ptr addrspace(1) %outv2, align 8, !tbaa !12
+ %call7 = tail call spir_func <2 x i32> @_Z26clock_read_hilo_work_groupv() #2
+ %arrayidx8 = getelementptr inbounds i8, ptr addrspace(1) %outv2, i32 8
+ store <2 x i32> %call7, ptr addrspace(1) %arrayidx8, align 8, !tbaa !12
+ %call9 = tail call spir_func <2 x i32> @_Z25clock_read_hilo_sub_groupv() #2
+ %arrayidx10 = getelementptr inbounds i8, ptr addrspace(1) %outv2, i32 16
+ store <2 x i32> %call9, ptr addrspace(1) %arrayidx10, align 8, !tbaa !12
+ ret void
+}
+
+; Function Attrs: convergent nounwind
+declare spir_func i64 @_Z17clock_read_devicev() local_unnamed_addr #1
+
+; Function Attrs: convergent nounwind
+declare spir_func i64 @_Z21clock_read_work_groupv() local_unnamed_addr #1
+
+; Function Attrs: convergent nounwind
+declare spir_func i64 @_Z20clock_read_sub_groupv() local_unnamed_addr #1
+
+; Function Attrs: convergent nounwind
+declare spir_func <2 x i32> @_Z22clock_read_hilo_devicev() local_unnamed_addr #1
+
+; Function Attrs: convergent nounwind
+declare spir_func <2 x i32> @_Z26clock_read_hilo_work_groupv() local_unnamed_addr #1
+
+; Function Attrs: convergent nounwind
+declare spir_func <2 x i32> @_Z25clock_read_hilo_sub_groupv() local_unnamed_addr #1
+
+attributes #0 = { convergent norecurse nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
+attributes #1 = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent nounwind }
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
+!2 = !{!"clang version 19.0.0git"}
+!3 = !{i32 1, i32 1}
+!4 = !{!"none", !"none"}
+!5 = !{!"ulong*", !"uint2*"}
+!6 = !{!"ulong*", !"uint __attribute__((ext_vector_type(2)))*"}
+!7 = !{!"", !""}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"long", !10, i64 0}
+!10 = !{!"omnipotent char", !11, i64 0}
+!11 = !{!"Simple C/C++ TBAA"}
+!12 = !{!10, !10, i64 0}
``````````
</details>
https://github.com/llvm/llvm-project/pull/92771
More information about the llvm-commits
mailing list