[clang] [llvm] [HLSL][SPIR-V] implement SV_GroupID semantic lowering (PR #121521)

Thu Jan 2 12:45:00 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-hlsl

Author: Zhengxing li (lizhengxing)

<details>
<summary>Changes</summary>

The HLSL SV_GroupID semantic attribute is lowered into @llvm.spv.group.id intrinsic in LLVM IR for SPIR-V target.

In the SPIR-V backend, this is now translated to a `WorkgroupId` builtin variable.

Fixes #118700 which's a follow-up work to #70120

---
Full diff: https://github.com/llvm/llvm-project/pull/121521.diff


6 Files Affected:

- (modified) clang/lib/CodeGen/CGHLSLRuntime.cpp (+1-1) 
- (modified) clang/lib/CodeGen/CGHLSLRuntime.h (+1) 
- (modified) clang/test/CodeGenHLSL/semantics/SV_GroupID.hlsl (+19-15) 
- (modified) llvm/include/llvm/IR/IntrinsicsSPIRV.td (+1) 
- (modified) llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp (+8) 
- (added) llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupID.ll (+82) 


``````````diff

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index c354e58e15f4bb..5679bd71581795 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -395,7 +395,7 @@ llvm::Value *CGHLSLRuntime::emitInputSemantic(IRBuilder<> &B,
     return buildVectorInput(B, GroupThreadIDIntrinsic, Ty);
   }
   if (D.hasAttr<HLSLSV_GroupIDAttr>()) {
-    llvm::Function *GroupIDIntrinsic = CGM.getIntrinsic(Intrinsic::dx_group_id);
+    llvm::Function *GroupIDIntrinsic = CGM.getIntrinsic(getGroupIdIntrinsic());
     return buildVectorInput(B, GroupIDIntrinsic, Ty);
   }
   assert(false && "Unhandled parameter attribute");
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index edb87f9d5efdf9..3d5724118611cb 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -87,6 +87,7 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(Radians, radians)
   GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
   GENERATE_HLSL_INTRINSIC_FUNCTION(GroupThreadId, thread_id_in_group)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(GroupId, group_id)
   GENERATE_HLSL_INTRINSIC_FUNCTION(FDot, fdot)
   GENERATE_HLSL_INTRINSIC_FUNCTION(SDot, sdot)
   GENERATE_HLSL_INTRINSIC_FUNCTION(UDot, udot)
diff --git a/clang/test/CodeGenHLSL/semantics/SV_GroupID.hlsl b/clang/test/CodeGenHLSL/semantics/SV_GroupID.hlsl
index 5e09f0fe06d4e6..3aa054afc90451 100644
--- a/clang/test/CodeGenHLSL/semantics/SV_GroupID.hlsl
+++ b/clang/test/CodeGenHLSL/semantics/SV_GroupID.hlsl
@@ -1,32 +1,36 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DXIL -DTARGET=dx
+// RUN: %clang_cc1 -triple spirv-linux-vulkan-library -x hlsl -emit-llvm -finclude-default-header -disable-llvm-passes -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SPIRV -DTARGET=spv
 
-// Make sure SV_GroupID translated into dx.group.id.
+// Make sure SV_GroupID translated into dx.group.id for directx target and spv.group.id for spirv target.
 
-// CHECK:  define void @foo()
-// CHECK:  %[[#ID:]] = call i32 @llvm.dx.group.id(i32 0)
-// CHECK:  call void @{{.*}}foo{{.*}}(i32 %[[#ID]])
+// CHECK:       define void @foo()
+// CHECK:       %[[#ID:]] = call i32 @llvm.[[TARGET]].group.id(i32 0)
+// CHECK-DXIL:       call void @{{.*}}foo{{.*}}(i32 %[[#ID]])
+// CHECK-SPIRV:      call spir_func void @{{.*}}foo{{.*}}(i32 %[[#ID]])
 [shader("compute")]
 [numthreads(8,8,1)]
 void foo(uint Idx : SV_GroupID) {}
 
-// CHECK:  define void @bar()
-// CHECK:  %[[#ID_X:]] = call i32 @llvm.dx.group.id(i32 0)
-// CHECK:  %[[#ID_X_:]] = insertelement <2 x i32> poison, i32 %[[#ID_X]], i64 0
-// CHECK:  %[[#ID_Y:]] = call i32 @llvm.dx.group.id(i32 1)
-// CHECK:  %[[#ID_XY:]] = insertelement <2 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1
-// CHECK:  call void @{{.*}}bar{{.*}}(<2 x i32> %[[#ID_XY]])
+// CHECK:       define void @bar()
+// CHECK:       %[[#ID_X:]] = call i32 @llvm.[[TARGET]].group.id(i32 0)
+// CHECK:       %[[#ID_X_:]] = insertelement <2 x i32> poison, i32 %[[#ID_X]], i64 0
+// CHECK:       %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].group.id(i32 1)
+// CHECK:       %[[#ID_XY:]] = insertelement <2 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1
+// CHECK-DXIL:  call void @{{.*}}bar{{.*}}(<2 x i32> %[[#ID_XY]])
+// CHECK-SPIRV:  call spir_func void @{{.*}}bar{{.*}}(<2 x i32> %[[#ID_XY]])
 [shader("compute")]
 [numthreads(8,8,1)]
 void bar(uint2 Idx : SV_GroupID) {}
 
 // CHECK:  define void @test()
-// CHECK:  %[[#ID_X:]] = call i32 @llvm.dx.group.id(i32 0)
+// CHECK:  %[[#ID_X:]] = call i32 @llvm.[[TARGET]].group.id(i32 0)
 // CHECK:  %[[#ID_X_:]] = insertelement <3 x i32> poison, i32 %[[#ID_X]], i64 0
-// CHECK:  %[[#ID_Y:]] = call i32 @llvm.dx.group.id(i32 1)
+// CHECK:  %[[#ID_Y:]] = call i32 @llvm.[[TARGET]].group.id(i32 1)
 // CHECK:  %[[#ID_XY:]] = insertelement <3 x i32> %[[#ID_X_]], i32 %[[#ID_Y]], i64 1
-// CHECK:  %[[#ID_Z:]] = call i32 @llvm.dx.group.id(i32 2)
+// CHECK:  %[[#ID_Z:]] = call i32 @llvm.[[TARGET]].group.id(i32 2)
 // CHECK:  %[[#ID_XYZ:]] = insertelement <3 x i32> %[[#ID_XY]], i32 %[[#ID_Z]], i64 2
-// CHECK:  call void @{{.*}}test{{.*}}(<3 x i32> %[[#ID_XYZ]])
+// CHECK-DXIL:   call void @{{.*}}test{{.*}}(<3 x i32> %[[#ID_XYZ]])
+// CHECK-SPIRV:  call spir_func void @{{.*}}test{{.*}}(<3 x i32> %[[#ID_XYZ]])
 [shader("compute")]
 [numthreads(8,8,1)]
 void test(uint3 Idx : SV_GroupID) {}
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index bcff0f20b985d4..8ebce408ff1380 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -59,6 +59,7 @@ let TargetPrefix = "spv" in {
 
   // The following intrinsic(s) are mirrored from IntrinsicsDirectX.td for HLSL support.
   def int_spv_thread_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>;
+  def int_spv_group_id : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>;
   def int_spv_thread_id_in_group : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrWillReturn]>;
   def int_spv_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
   def int_spv_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], [IntrNoMem]>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 289d5f31664872..0fa0986a10c693 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -2881,6 +2881,14 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
     // translated to a `LocalInvocationId` builtin variable
     return loadVec3BuiltinInputID(SPIRV::BuiltIn::LocalInvocationId, ResVReg,
                                   ResType, I);
+  case Intrinsic::spv_group_id:
+    // The HLSL SV_GroupId semantic is lowered to
+    // llvm.spv.group.id intrinsic in LLVM IR for SPIR-V backend.
+    //
+    // In SPIR-V backend, llvm.spv.group.id is now translated to a `WorkgroupId`
+    // builtin variable
+    return loadVec3BuiltinInputID(SPIRV::BuiltIn::WorkgroupId, ResVReg, ResType,
+                                  I);
   case Intrinsic::spv_fdot:
     return selectFloatDot(ResVReg, ResType, I);
   case Intrinsic::spv_udot:
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupID.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupID.ll
new file mode 100644
index 00000000000000..1c218339ee1a1e
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_GroupID.ll
@@ -0,0 +1,82 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-vulkan-unknown %s -o - -filetype=obj | spirv-val %}
+
+; This file generated from the following command:
+; clang -cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -finclude-default-header - -o - <<EOF
+; [shader("compute")]
+; [numthreads(1,1,1)]
+; void main(uint3 ID : SV_GroupID) {}
+; EOF
+
+; CHECK-DAG:        %[[#int:]] = OpTypeInt 32 0
+; CHECK-DAG:        %[[#v3int:]] = OpTypeVector %[[#int]] 3
+; CHECK-DAG:        %[[#ptr_Input_v3int:]] = OpTypePointer Input %[[#v3int]]
+; CHECK-DAG:        %[[#tempvar:]] = OpUndef %[[#v3int]]
+; CHECK-DAG:        %[[#WorkgroupId:]] = OpVariable %[[#ptr_Input_v3int]] Input
+
+; CHECK-DAG:        OpEntryPoint GLCompute {{.*}} %[[#WorkgroupId]]
+; CHECK-DAG:        OpName %[[#WorkgroupId]] "__spirv_BuiltInWorkgroupId"
+; CHECK-DAG:        OpDecorate %[[#WorkgroupId]] LinkageAttributes "__spirv_BuiltInWorkgroupId" Import
+; CHECK-DAG:        OpDecorate %[[#WorkgroupId]] BuiltIn WorkgroupId
+
+; ModuleID = '-'
+source_filename = "-"
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
+target triple = "spirv-unknown-vulkan-library"
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define internal spir_func void @_Z4mainDv3_j(<3 x i32> noundef %ID) #0 {
+entry:
+  %0 = call token @llvm.experimental.convergence.entry()
+  %ID.addr = alloca <3 x i32>, align 16
+  store <3 x i32> %ID, ptr %ID.addr, align 16
+  ret void
+}
+
+; Function Attrs: convergent noinline norecurse
+define void @main() #1 {
+entry:
+  %0 = call token @llvm.experimental.convergence.entry()
+
+; CHECK:        %[[#load:]] = OpLoad %[[#v3int]] %[[#WorkgroupId]]
+; CHECK:        %[[#load0:]] = OpCompositeExtract %[[#int]] %[[#load]] 0
+  %1 = call i32 @llvm.spv.group.id(i32 0)
+
+; CHECK:        %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load0]] %[[#tempvar]]
+  %2 = insertelement <3 x i32> poison, i32 %1, i64 0
+
+; CHECK:        %[[#load:]] = OpLoad %[[#v3int]] %[[#WorkgroupId]]
+; CHECK:        %[[#load1:]] = OpCompositeExtract %[[#int]] %[[#load]] 1
+  %3 = call i32 @llvm.spv.group.id(i32 1)
+
+; CHECK:        %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load1]] %[[#tempvar]] 1
+  %4 = insertelement <3 x i32> %2, i32 %3, i64 1
+
+; CHECK:        %[[#load:]] = OpLoad %[[#v3int]] %[[#WorkgroupId]]
+; CHECK:        %[[#load2:]] = OpCompositeExtract %[[#int]] %[[#load]] 2
+  %5 = call i32 @llvm.spv.group.id(i32 2)
+
+; CHECK:        %[[#tempvar:]] = OpCompositeInsert %[[#v3int]] %[[#load2]] %[[#tempvar]] 2
+  %6 = insertelement <3 x i32> %4, i32 %5, i64 2
+
+  call spir_func void @_Z4mainDv3_j(<3 x i32> %6) [ "convergencectrl"(token %0) ]
+  ret void
+}
+
+; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none)
+declare token @llvm.experimental.convergence.entry() #2
+
+; Function Attrs: nounwind willreturn memory(none)
+declare i32 @llvm.spv.group.id(i32) #3
+
+attributes #0 = { alwaysinline convergent mustprogress norecurse nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { convergent noinline norecurse "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #3 = { nounwind willreturn memory(none) }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 4, !"dx.disable_optimizations", i32 1}
+!2 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 4075ddad7183e6f0b66e2c8cc7a03b461a8038e6)"}

``````````

</details>


https://github.com/llvm/llvm-project/pull/121521