[llvm-branch-commits] [clang] [AMDGPU] Add builtins for wave reduction intrinsics (PR #194814)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue May 5 07:37:35 PDT 2026
https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/194814
>From 547da83d8be19e5825f3563b0ecfdb051106bbe6 Mon Sep 17 00:00:00 2001
From: Aaditya <Aaditya.AlokDeshpande at amd.com>
Date: Wed, 29 Apr 2026 10:07:32 +0530
Subject: [PATCH 1/2] [AMDGPU] Add builtins for wave reduction intrinsics
Assisted by - Claude-sonnet:4.6
---
clang/include/clang/Basic/BuiltinsAMDGPU.td | 9 +
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 18 ++
clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 189 ++++++++++++++++++++
3 files changed, 216 insertions(+)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td
index fc910123560a9..9c58805b353d3 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.td
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td
@@ -538,6 +538,15 @@ def __builtin_amdgcn_is_invocable : AMDGPUBuiltin<"__amdgpu_feature_predicate_t(
//===----------------------------------------------------------------------===//
+def __builtin_amdgcn_wave_reduce_add_u16 : AMDGPUBuiltin<"unsigned short(unsigned short, _Constant int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_sub_u16 : AMDGPUBuiltin<"unsigned short(unsigned short, _Constant int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_min_i16 : AMDGPUBuiltin<"short(short, _Constant int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_min_u16 : AMDGPUBuiltin<"unsigned short(unsigned short, _Constant int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_max_i16 : AMDGPUBuiltin<"short(short, _Constant int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_max_u16 : AMDGPUBuiltin<"unsigned short(unsigned short, _Constant int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_and_b16 : AMDGPUBuiltin<"short(short, _Constant int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_or_b16 : AMDGPUBuiltin<"short(short, _Constant int32_t)", [Const]>;
+def __builtin_amdgcn_wave_reduce_xor_b16 : AMDGPUBuiltin<"short(short, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_add_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_sub_u32 : AMDGPUBuiltin<"uint32_t(uint32_t, _Constant int32_t)", [Const]>;
def __builtin_amdgcn_wave_reduce_min_i32 : AMDGPUBuiltin<"int32_t(int32_t, _Constant int32_t)", [Const]>;
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index cfad312d7535a..c3f358d6defc2 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -476,42 +476,51 @@ static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) {
switch (BuiltinID) {
default:
llvm_unreachable("Unknown BuiltinID for wave reduction");
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u16:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
return Intrinsic::amdgcn_wave_reduce_add;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
return Intrinsic::amdgcn_wave_reduce_fadd;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u16:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
return Intrinsic::amdgcn_wave_reduce_sub;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
return Intrinsic::amdgcn_wave_reduce_fsub;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i16:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64:
return Intrinsic::amdgcn_wave_reduce_min;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
return Intrinsic::amdgcn_wave_reduce_fmin;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u16:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64:
return Intrinsic::amdgcn_wave_reduce_umin;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i16:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64:
return Intrinsic::amdgcn_wave_reduce_max;
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
return Intrinsic::amdgcn_wave_reduce_fmax;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u16:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64:
return Intrinsic::amdgcn_wave_reduce_umax;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b16:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64:
return Intrinsic::amdgcn_wave_reduce_and;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b16:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64:
return Intrinsic::amdgcn_wave_reduce_or;
+ case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b16:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64:
return Intrinsic::amdgcn_wave_reduce_xor;
@@ -523,22 +532,31 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent;
llvm::SyncScope::ID SSID;
switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u16:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fadd_f64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u16:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fsub_f64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i16:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u16:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmin_f64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i16:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u16:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_fmax_f64:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b16:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b16:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32:
+ case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b16:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_u64:
case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_u64:
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
index 2d645a968f2fd..71c0a9da840bf 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl
@@ -944,6 +944,195 @@ void test_wave_reduce_max_u64_dpp(global int* out, long in)
*out = __builtin_amdgcn_wave_reduce_max_u64(in, 2);
}
+// CHECK-LABEL: @test_wave_reduce_add_u16_default
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.add.i16(
+void test_wave_reduce_add_u16_default(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_add_u16(in, 0);
+}
+
+// CHECK-LABEL: @test_wave_reduce_add_u16_iterative
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.add.i16(
+void test_wave_reduce_add_u16_iterative(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_add_u16(in, 1);
+}
+
+// CHECK-LABEL: @test_wave_reduce_add_u16_dpp
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.add.i16(
+void test_wave_reduce_add_u16_dpp(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_add_u16(in, 2);
+}
+
+// CHECK-LABEL: @test_wave_reduce_sub_u16_default
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.sub.i16(
+void test_wave_reduce_sub_u16_default(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_sub_u16(in, 0);
+}
+
+// CHECK-LABEL: @test_wave_reduce_sub_u16_iterative
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.sub.i16(
+void test_wave_reduce_sub_u16_iterative(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_sub_u16(in, 1);
+}
+
+// CHECK-LABEL: @test_wave_reduce_sub_u16_dpp
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.sub.i16(
+void test_wave_reduce_sub_u16_dpp(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_sub_u16(in, 2);
+}
+
+// CHECK-LABEL: @test_wave_reduce_min_i16_default
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.min.i16(
+void test_wave_reduce_min_i16_default(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_min_i16(in, 0);
+}
+
+// CHECK-LABEL: @test_wave_reduce_min_i16_iterative
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.min.i16(
+void test_wave_reduce_min_i16_iterative(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_min_i16(in, 1);
+}
+
+// CHECK-LABEL: @test_wave_reduce_min_i16_dpp
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.min.i16(
+void test_wave_reduce_min_i16_dpp(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_min_i16(in, 2);
+}
+
+// CHECK-LABEL: @test_wave_reduce_min_u16_default
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.umin.i16(
+void test_wave_reduce_min_u16_default(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_min_u16(in, 0);
+}
+
+// CHECK-LABEL: @test_wave_reduce_min_u16_iterative
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.umin.i16(
+void test_wave_reduce_min_u16_iterative(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_min_u16(in, 1);
+}
+
+// CHECK-LABEL: @test_wave_reduce_min_u16_dpp
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.umin.i16(
+void test_wave_reduce_min_u16_dpp(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_min_u16(in, 2);
+}
+
+// CHECK-LABEL: @test_wave_reduce_max_i16_default
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.max.i16(
+void test_wave_reduce_max_i16_default(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_max_i16(in, 0);
+}
+
+// CHECK-LABEL: @test_wave_reduce_max_i16_iterative
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.max.i16(
+void test_wave_reduce_max_i16_iterative(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_max_i16(in, 1);
+}
+
+// CHECK-LABEL: @test_wave_reduce_max_i16_dpp
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.max.i16(
+void test_wave_reduce_max_i16_dpp(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_max_i16(in, 2);
+}
+
+// CHECK-LABEL: @test_wave_reduce_max_u16_default
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.umax.i16(
+void test_wave_reduce_max_u16_default(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_max_u16(in, 0);
+}
+
+// CHECK-LABEL: @test_wave_reduce_max_u16_iterative
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.umax.i16(
+void test_wave_reduce_max_u16_iterative(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_max_u16(in, 1);
+}
+
+// CHECK-LABEL: @test_wave_reduce_max_u16_dpp
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.umax.i16(
+void test_wave_reduce_max_u16_dpp(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_max_u16(in, 2);
+}
+
+// CHECK-LABEL: @test_wave_reduce_and_b16_default
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.and.i16(
+void test_wave_reduce_and_b16_default(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_and_b16(in, 0);
+}
+
+// CHECK-LABEL: @test_wave_reduce_and_b16_iterative
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.and.i16(
+void test_wave_reduce_and_b16_iterative(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_and_b16(in, 1);
+}
+
+// CHECK-LABEL: @test_wave_reduce_and_b16_dpp
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.and.i16(
+void test_wave_reduce_and_b16_dpp(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_and_b16(in, 2);
+}
+
+// CHECK-LABEL: @test_wave_reduce_or_b16_default
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.or.i16(
+void test_wave_reduce_or_b16_default(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_or_b16(in, 0);
+}
+
+// CHECK-LABEL: @test_wave_reduce_or_b16_iterative
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.or.i16(
+void test_wave_reduce_or_b16_iterative(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_or_b16(in, 1);
+}
+
+// CHECK-LABEL: @test_wave_reduce_or_b16_dpp
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.or.i16(
+void test_wave_reduce_or_b16_dpp(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_or_b16(in, 2);
+}
+
+// CHECK-LABEL: @test_wave_reduce_xor_b16_default
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.xor.i16(
+void test_wave_reduce_xor_b16_default(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_xor_b16(in, 0);
+}
+
+// CHECK-LABEL: @test_wave_reduce_xor_b16_iterative
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.xor.i16(
+void test_wave_reduce_xor_b16_iterative(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_xor_b16(in, 1);
+}
+
+// CHECK-LABEL: @test_wave_reduce_xor_b16_dpp
+// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.wave.reduce.xor.i16(
+void test_wave_reduce_xor_b16_dpp(global short* out, short in)
+{
+ *out = __builtin_amdgcn_wave_reduce_xor_b16(in, 2);
+}
+
// CHECK-LABEL: @test_s_barrier
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.barrier(
void test_s_barrier()
>From 15307092460738a6d24139b7baa97d4a100cccc2 Mon Sep 17 00:00:00 2001
From: Aaditya <Aaditya.AlokDeshpande at amd.com>
Date: Mon, 4 May 2026 14:39:06 +0530
Subject: [PATCH 2/2] Missing SEMA tests
---
.../wave-reduce-builtins-validate-amdgpu.cl | 26 +++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/clang/test/Sema/wave-reduce-builtins-validate-amdgpu.cl b/clang/test/Sema/wave-reduce-builtins-validate-amdgpu.cl
index 0f1565f1272c1..373c771c178a3 100644
--- a/clang/test/Sema/wave-reduce-builtins-validate-amdgpu.cl
+++ b/clang/test/Sema/wave-reduce-builtins-validate-amdgpu.cl
@@ -3,6 +3,32 @@
// Test that the second argument (strategy) must be a constant integer
+void test_wave_reduce_u16(unsigned short val, int strategy) {
+ (void)__builtin_amdgcn_wave_reduce_add_u16(val, 0);
+ (void)__builtin_amdgcn_wave_reduce_sub_u16(val, 1);
+ (void)__builtin_amdgcn_wave_reduce_min_u16(val, 0);
+ (void)__builtin_amdgcn_wave_reduce_max_u16(val, 0);
+
+ (void)__builtin_amdgcn_wave_reduce_add_u16(val, strategy); // expected-error {{argument to '__builtin_amdgcn_wave_reduce_add_u16' must be a constant integer}}
+ (void)__builtin_amdgcn_wave_reduce_sub_u16(val, strategy); // expected-error {{argument to '__builtin_amdgcn_wave_reduce_sub_u16' must be a constant integer}}
+ (void)__builtin_amdgcn_wave_reduce_min_u16(val, strategy); // expected-error {{argument to '__builtin_amdgcn_wave_reduce_min_u16' must be a constant integer}}
+ (void)__builtin_amdgcn_wave_reduce_max_u16(val, strategy); // expected-error {{argument to '__builtin_amdgcn_wave_reduce_max_u16' must be a constant integer}}
+}
+
+void test_wave_reduce_i16(short val, int strategy) {
+ (void)__builtin_amdgcn_wave_reduce_min_i16(val, 0);
+ (void)__builtin_amdgcn_wave_reduce_max_i16(val, 0);
+ (void)__builtin_amdgcn_wave_reduce_and_b16(val, 0);
+ (void)__builtin_amdgcn_wave_reduce_or_b16(val, 0);
+ (void)__builtin_amdgcn_wave_reduce_xor_b16(val, 0);
+
+ (void)__builtin_amdgcn_wave_reduce_min_i16(val, strategy); // expected-error {{argument to '__builtin_amdgcn_wave_reduce_min_i16' must be a constant integer}}
+ (void)__builtin_amdgcn_wave_reduce_max_i16(val, strategy); // expected-error {{argument to '__builtin_amdgcn_wave_reduce_max_i16' must be a constant integer}}
+ (void)__builtin_amdgcn_wave_reduce_and_b16(val, strategy); // expected-error {{argument to '__builtin_amdgcn_wave_reduce_and_b16' must be a constant integer}}
+ (void)__builtin_amdgcn_wave_reduce_or_b16(val, strategy); // expected-error {{argument to '__builtin_amdgcn_wave_reduce_or_b16' must be a constant integer}}
+ (void)__builtin_amdgcn_wave_reduce_xor_b16(val, strategy); // expected-error {{argument to '__builtin_amdgcn_wave_reduce_xor_b16' must be a constant integer}}
+}
+
void test_wave_reduce_u32(unsigned int val, int strategy) {
(void)__builtin_amdgcn_wave_reduce_add_u32(val, 0);
(void)__builtin_amdgcn_wave_reduce_sub_u32(val, 1);
More information about the llvm-branch-commits
mailing list