[clang] [llvm] [AMDGPU] Add `wave_id` and `wave_shuffle` Clang builtins. (PR #179492)
Alex Voicu via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 3 10:48:35 PST 2026
https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/179492
>From edf668446bdf50c27f8ec01ada9f7ab67157083f Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 3 Feb 2026 16:19:51 +0000
Subject: [PATCH 1/4] Add `wave_id` and `wave_shuffle` Clang builtins.
---
clang/include/clang/Basic/BuiltinsAMDGPU.td | 9 +++++++
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 4 +++
clang/lib/Sema/SemaAMDGPU.cpp | 25 +++++++++++++++++++
clang/lib/Sema/SemaChecking.cpp | 4 ++-
.../CodeGenOpenCL/builtins-amdgcn-gfx12.cl | 16 ++++++++++++
clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 21 ++++++++++++++++
6 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td
index 1950757097fc6..a9acc1544ad53 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.td
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td
@@ -67,6 +67,8 @@ def __builtin_amdgcn_mbcnt_lo : AMDGPUBuiltin<"unsigned int(unsigned int, unsign
def __builtin_amdgcn_s_memtime : AMDGPUBuiltin<"uint64_t()", [], "s-memtime-inst">;
+def __builtin_amdgcn_wave_id : AMDGPUBuiltin<"int32_t()", [Const], "architected-sgprs">;
+
//===----------------------------------------------------------------------===//
// Instruction builtins.
//===----------------------------------------------------------------------===//
@@ -413,6 +415,13 @@ def __builtin_amdgcn_wave_reduce_fsub_f64 : AMDGPUBuiltin<"double(double, _Const
def __builtin_amdgcn_wave_reduce_fmin_f64 : AMDGPUBuiltin<"double(double, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_fmax_f64 : AMDGPUBuiltin<"double(double, _Constant int32_t)", [Const]>;
+//===----------------------------------------------------------------------===//
+// Wave Shuffle builtins.
+//===----------------------------------------------------------------------===//
+
+// This is an overloaded builtin modelled after the atomic ones
+def __builtin_amdgcn_wave_shuffle : AMDGPUBuiltin<"void(...)", [Const, CustomTypeChecking]>;
+
//===----------------------------------------------------------------------===//
// R600-NI only builtins.
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index a096ed27a788e..619c9b4be9090 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -449,6 +449,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()});
return Builder.CreateCall(F, {Value, Strategy});
}
+ case AMDGPU::BI__builtin_amdgcn_wave_shuffle:
+ // TODO: can we unify this with wave_reduce?
+ return emitBuiltinWithOneOverloadedType<2>(*this, E,
+ Intrinsic::amdgcn_wave_shuffle);
case AMDGPU::BI__builtin_amdgcn_div_scale:
case AMDGPU::BI__builtin_amdgcn_div_scalef: {
// Translate from the intrinsics's struct return to the builtin's out
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 4261e1849133f..d5403f22eb7bb 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -296,6 +296,31 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
}
return false;
}
+ case AMDGPU::BI__builtin_amdgcn_wave_shuffle: {
+ Expr *Val = TheCall->getArg(0);
+ QualType ValTy = Val->getType();
+
+ if ((!ValTy->isIntegerType() && !ValTy->isFloatingType()) ||
+ SemaRef.getASTContext().getTypeSize(ValTy) > 32)
+ return Diag(Val->getExprLoc(), diag::err_builtin_invalid_arg_type)
+ << Val << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/2
+ << ValTy;
+
+ Expr *Idx = TheCall->getArg(1);
+ QualType IdxTy = Idx->getType();
+ if (!IdxTy->isIntegerType())
+ return Diag(Idx->getExprLoc(), diag::err_typecheck_expect_int) << IdxTy;
+ if (SemaRef.getASTContext().getTypeSize(IdxTy) > 32)
+ return Diag(Idx->getExprLoc(), diag::err_builtin_invalid_arg_type)
+ << Idx << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/0
+ << IdxTy;
+
+ // Resolve the overload here, now that we know that the invocation is
+ // correct: the intrinsic returns the type of the value argument.
+ TheCall->setType(ValTy);
+
+ return false;
+ }
default:
return false;
}
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e2e1b37572364..9858264aa042d 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2100,8 +2100,10 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case llvm::Triple::spirv:
case llvm::Triple::spirv32:
case llvm::Triple::spirv64:
- if (TI.getTriple().getOS() != llvm::Triple::OSType::AMDHSA)
+ if (TI.getTriple().getVendor() != llvm::Triple::VendorType::AMD)
return SPIRV().CheckSPIRVBuiltinFunctionCall(TI, BuiltinID, TheCall);
+ else
+ return AMDGPU().CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall);
return false;
case llvm::Triple::systemz:
return SystemZ().CheckSystemZBuiltinFunctionCall(BuiltinID, TheCall);
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
index 8c02616780182..d39c4180178ad 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
@@ -317,3 +317,19 @@ void test_ds_bpermute_fi_b32(global int* out, int a, int b)
{
*out = __builtin_amdgcn_ds_bpermute_fi_b32(a, b);
}
+
+__attribute__((target("architected-sgprs")))
+// CHECK-LABEL: @test_wave_id(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.wave.id()
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[TMP1]], align 4
+// CHECK-NEXT: ret void
+//
+void test_wave_id(global int* out)
+{
+ *out = __builtin_amdgcn_wave_id();
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 376105cb6594c..4755cd32a2e2c 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -937,6 +937,27 @@ void test_wave_reduce_max_u64_dpp(global int* out, long in)
*out = __builtin_amdgcn_wave_reduce_max_u64(in, 2);
}
+// CHECK-LABEL: @test_wave_shuffle_u32
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.shuffle.i32
+void test_wave_shuffle_u32(global unsigned* out, unsigned in, int idx)
+{
+ *out = __builtin_amdgcn_wave_shuffle(in, idx);
+}
+
+// CHECK-LABEL: @test_wave_shuffle_i32
+// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.shuffle.i32
+void test_wave_shuffle_i32(global int* out, int in, int idx)
+{
+ *out = __builtin_amdgcn_wave_shuffle(in, idx);
+}
+
+// CHECK-LABEL: @test_wave_shuffle_f32
+// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.shuffle.f32
+void test_wave_shuffle_f32(global float* out, float in, int idx)
+{
+ *out = __builtin_amdgcn_wave_shuffle(in, idx);
+}
+
// CHECK-LABEL: @test_s_barrier
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.barrier(
void test_s_barrier()
>From deb0a0d3e8f39604aeca1c5fc148b90f6a93d4ba Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 3 Feb 2026 16:25:23 +0000
Subject: [PATCH 2/4] Fix formatting.
---
clang/lib/Sema/SemaAMDGPU.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index d5403f22eb7bb..8fce0a56bc4f9 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -303,8 +303,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
if ((!ValTy->isIntegerType() && !ValTy->isFloatingType()) ||
SemaRef.getASTContext().getTypeSize(ValTy) > 32)
return Diag(Val->getExprLoc(), diag::err_builtin_invalid_arg_type)
- << Val << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/2
- << ValTy;
+ << Val << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/2
+ << ValTy;
Expr *Idx = TheCall->getArg(1);
QualType IdxTy = Idx->getType();
@@ -312,8 +312,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
return Diag(Idx->getExprLoc(), diag::err_typecheck_expect_int) << IdxTy;
if (SemaRef.getASTContext().getTypeSize(IdxTy) > 32)
return Diag(Idx->getExprLoc(), diag::err_builtin_invalid_arg_type)
- << Idx << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/0
- << IdxTy;
+ << Idx << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/0
+ << IdxTy;
// Resolve the overload here, now that we know that the invocation is
// correct: the intrinsic returns the type of the value argument.
>From 2dc9e3b4f5a5b8209768e60d11f7138956d5c13c Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 3 Feb 2026 17:28:21 +0000
Subject: [PATCH 3/4] Add tests for Sema failure + missing update.
---
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl | 2 ++
clang/test/SemaOpenCL/builtins-amdgcn-error.cl | 7 +++++++
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 1 +
3 files changed, 10 insertions(+)
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl
index 34887a65021c3..09afb7bc12017 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl
@@ -13,4 +13,6 @@ typedef unsigned int uint;
void test(global uint* out, uint a, uint b, uint c) {
*out = __builtin_amdgcn_permlane16_var(a, b, c, 1, 1); // expected-error {{'__builtin_amdgcn_permlane16_var' needs target feature gfx12-insts}}
*out = __builtin_amdgcn_permlanex16_var(a, b, c, 1, 1); // expected-error {{'__builtin_amdgcn_permlanex16_var' needs target feature gfx12-insts}}
+ (void)__builtin_amdgcn_wave_id(); // expected-error {{'__builtin_amdgcn_wave_id' needs target feature architected-sgprs}}
}
+
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
index eb1a86bdcdeb0..12b9645463f3a 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl
@@ -229,3 +229,10 @@ void test_atomic_dec64() {
__INT64_TYPE__ signedVal = 15;
signedVal = __builtin_amdgcn_atomic_dec64(&signedVal, signedVal, __ATOMIC_ACQUIRE, ""); // expected-warning {{passing '__private long *' to parameter of type 'volatile __private unsigned long *' converts between pointers to integer types with different sign}}
}
+
+void test_wave_shuffle(double d, int i, long long lli) {
+ struct S { int x; } s;
+ int x = __builtin_amdgcn_wave_shuffle(lli, i); // expected-error {{'lli' argument must be a scalar 'int' or 16 or 32 bit floating-point type (was '__private long long')}}
+ int y = __builtin_amdgcn_wave_shuffle(i, lli); // expected-error {{'lli' argument must be a scalar 'int' type (was '__private long long')}}
+ float z = __builtin_amdgcn_wave_shuffle(s, i); // expected-error {{'s' argument must be a scalar 'int' or 16 or 32 bit floating-point type (was '__private struct S')}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index a8eba9ed126b7..cff37bb42965a 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -3140,6 +3140,7 @@ def int_amdgcn_ds_read_tr16_b64 : AMDGPULoadIntrinsic<local_ptr_ty>;
// i32 @llvm.amdgcn.wave.id()
def int_amdgcn_wave_id :
+ ClangBuiltin<"__builtin_amdgcn_wave_id">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>, IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_s_prefetch_data :
>From 223d63923954f109e105b84e1372d5f4fe14c6cd Mon Sep 17 00:00:00 2001
From: Alex Voicu <alexandru.voicu at amd.com>
Date: Tue, 3 Feb 2026 18:48:22 +0000
Subject: [PATCH 4/4] Remove stray newline.
---
clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl
index 09afb7bc12017..f1736fdfc9086 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl
@@ -15,4 +15,3 @@ void test(global uint* out, uint a, uint b, uint c) {
*out = __builtin_amdgcn_permlanex16_var(a, b, c, 1, 1); // expected-error {{'__builtin_amdgcn_permlanex16_var' needs target feature gfx12-insts}}
(void)__builtin_amdgcn_wave_id(); // expected-error {{'__builtin_amdgcn_wave_id' needs target feature architected-sgprs}}
}
-
More information about the cfe-commits
mailing list