[clang] [llvm] [HLSL] Handle WaveActiveBallot struct return type appropriately (PR #175105)
Joshua Batista via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 13 14:33:10 PST 2026
https://github.com/bob80905 updated https://github.com/llvm/llvm-project/pull/175105
>From c74bf28df75691dfdc3537462a8f1d735b51f865 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Thu, 8 Jan 2026 17:06:34 -0800
Subject: [PATCH 01/10] handle waveballot struct return type
---
clang/include/clang/Basic/Builtins.td | 2 +-
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 29 +++++++++++++++++--
clang/lib/Sema/SemaHLSL.cpp | 5 ++++
llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 +-
llvm/lib/Target/DirectX/DXIL.td | 8 ++---
llvm/lib/Target/DirectX/DXILOpBuilder.cpp | 17 +++++++++--
llvm/test/CodeGen/DirectX/WaveActiveBallot.ll | 12 ++++----
7 files changed, 58 insertions(+), 17 deletions(-)
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 0ab50b06e11cf..ccbc0abe3f0b4 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5058,7 +5058,7 @@ def HLSLWaveActiveAnyTrue : LangBuiltin<"HLSL_LANG"> {
def HLSLWaveActiveBallot : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_wave_active_ballot"];
let Attributes = [NoThrow, Const];
- let Prototype = "_ExtVector<4, unsigned int>(bool)";
+ let Prototype = "void(bool)";
}
def HLSLWaveActiveCountBits : LangBuiltin<"HLSL_LANG"> {
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 1b6c3714f7821..c5a072bfa3974 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -160,6 +160,31 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
return LastInst;
}
+static Value *handleHlslWaveActiveBallot(const CallExpr *E,
+ CodeGenFunction *CGF) {
+ Value *Cond = CGF->EmitScalarExpr(E->getArg(0));
+ llvm::Type *I32 = CGF->Int32Ty;
+ llvm::StructType *RetTy = llvm::StructType::get(I32, I32, I32, I32);
+
+ if (CGF->CGM.getTarget().getTriple().isDXIL()) {
+ // dx.op.waveActiveBallot(opcode, i1)
+ return CGF->Builder.CreateIntrinsic(RetTy, Intrinsic::dx_wave_ballot,
+ {Cond}, nullptr, "wave.active.ballot");
+ }
+
+ if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
+ // spv.wave.ballot(i1) -> <4 x i32>, then bitcast to struct
+ llvm::Type *VecTy = llvm::FixedVectorType::get(I32, 4);
+ return CGF->Builder.CreateIntrinsic(VecTy, Intrinsic::spv_wave_ballot,
+ {Cond}, nullptr, "spv.wave.ballot");
+ }
+
+ CGF->CGM.Error(E->getExprLoc(),
+ "waveActiveBallot is not supported for this target");
+
+ return llvm::UndefValue::get(RetTy);
+}
+
static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
const CallExpr *E) {
Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
@@ -834,9 +859,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
assert(Op->getType()->isIntegerTy(1) &&
"Intrinsic WaveActiveBallot operand must be a bool");
- Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBallotIntrinsic();
- return EmitRuntimeCall(
- Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID), {Op});
+ return handleHlslWaveActiveBallot(E, this);
}
case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
Value *OpExpr = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index a6de1cd550212..51f74c10677a9 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3507,6 +3507,11 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
+ case Builtin::BI__builtin_hlsl_wave_active_ballot: {
+ if (SemaRef.checkArgCount(TheCall, 1))
+ return true;
+ break;
+ }
case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
if (SemaRef.checkArgCount(TheCall, 3))
return true;
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 6e6eb2d0ece9d..f79945785566c 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -153,7 +153,7 @@ def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]
def int_dx_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
-def int_dx_wave_ballot : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
+def int_dx_wave_ballot : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_getlaneindex : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 6d04732d92ecf..23701e2218e57 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -58,6 +58,7 @@ def ResPropsTy : DXILOpParamType;
def SplitDoubleTy : DXILOpParamType;
def BinaryWithCarryTy : DXILOpParamType;
def DimensionsTy : DXILOpParamType;
+def Fouri32s : DXILOpParamType;
class DXILOpClass;
@@ -212,13 +213,12 @@ defset list<DXILOpClass> OpClasses = {
def unpack4x8 : DXILOpClass;
def viewID : DXILOpClass;
def waveActiveAllEqual : DXILOpClass;
- def waveActiveBallot : DXILOpClass;
def waveActiveBit : DXILOpClass;
def waveActiveOp : DXILOpClass;
def waveAllOp : DXILOpClass;
def waveAllTrue : DXILOpClass;
def waveAnyTrue : DXILOpClass;
- def waveBallot : DXILOpClass;
+ def waveActiveBallot : DXILOpClass;
def waveGetLaneCount : DXILOpClass;
def waveGetLaneIndex : DXILOpClass;
def waveIsFirstLane : DXILOpClass;
@@ -1072,11 +1072,11 @@ def WaveReadLaneAt : DXILOp<117, waveReadLaneAt> {
let stages = [Stages<DXIL1_0, [all_stages]>];
}
-def WaveActiveBallot : DXILOp<118, waveBallot> {
+def WaveActiveBallot : DXILOp<116, waveActiveBallot> {
let Doc = "returns uint4 containing a bitmask of the evaluation of the boolean expression for all active lanes in the current wave.";
let intrinsics = [IntrinSelect<int_dx_wave_ballot>];
let arguments = [Int1Ty];
- let result = OverloadTy;
+ let result = Fouri32s;
let stages = [Stages<DXIL1_0, [all_stages]>];
}
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 944b2e6433988..1f41d2457e5bc 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -261,10 +261,18 @@ static StructType *getBinaryWithCarryType(LLVMContext &Context) {
return StructType::create({Int32Ty, Int1Ty}, "dx.types.i32c");
}
-static StructType *getDimensionsType(LLVMContext &Ctx) {
- Type *Int32Ty = Type::getInt32Ty(Ctx);
+static StructType *getDimensionsType(LLVMContext &Context) {
+ Type *Int32Ty = Type::getInt32Ty(Context);
return getOrCreateStructType("dx.types.Dimensions",
- {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, Ctx);
+ {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, Context);
+}
+
+static StructType *getFouri32sType(LLVMContext &Context) {
+ if (auto *ST = StructType::getTypeByName(Context, "dx.types.fouri32"))
+ return ST;
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ return getOrCreateStructType("dx.types.fouri32",
+ {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, Context);
}
static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
@@ -326,7 +334,10 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
return getBinaryWithCarryType(Ctx);
case OpParamType::DimensionsTy:
return getDimensionsType(Ctx);
+ case OpParamType::Fouri32s:
+ return getFouri32sType(Ctx);
}
+
llvm_unreachable("Invalid parameter kind");
return nullptr;
}
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveBallot.ll b/llvm/test/CodeGen/DirectX/WaveActiveBallot.ll
index cf6255de3a734..31a64cbcf061e 100644
--- a/llvm/test/CodeGen/DirectX/WaveActiveBallot.ll
+++ b/llvm/test/CodeGen/DirectX/WaveActiveBallot.ll
@@ -1,10 +1,12 @@
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
-define noundef <4 x i32> @wave_ballot_simple(i1 noundef %p1) {
+%dx.types.fouri32 = type { i32, i32, i32, i32 }
+
+define noundef %dx.types.fouri32 @wave_ballot_simple(i1 noundef %p1) {
entry:
-; CHECK: call <4 x i32> @dx.op.waveBallot.void(i32 118, i1 %p1)
- %ret = call <4 x i32> @llvm.dx.wave.ballot(i1 %p1)
- ret <4 x i32> %ret
+; CHECK: call %dx.types.fouri32 @dx.op.waveActiveBallot(i32 116, i1 %p1)
+ %ret = call %dx.types.fouri32 @llvm.dx.wave.ballot(i1 %p1)
+ ret %dx.types.fouri32 %ret
}
-declare <4 x i32> @llvm.dx.wave.ballot(i1)
+declare %dx.types.fouri32 @llvm.dx.wave.ballot(i1)
>From b54b8d5a525cb9817603488ba7c8a2220bc6baab Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Thu, 8 Jan 2026 19:53:00 -0800
Subject: [PATCH 02/10] update codegen to use emitruntimecall to force use of
convergence token
---
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 16 +++++++---------
.../CodeGenHLSL/builtins/WaveActiveBallot.hlsl | 11 ++++++++---
2 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index c5a072bfa3974..1e3f5611e69d1 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -164,25 +164,23 @@ static Value *handleHlslWaveActiveBallot(const CallExpr *E,
CodeGenFunction *CGF) {
Value *Cond = CGF->EmitScalarExpr(E->getArg(0));
llvm::Type *I32 = CGF->Int32Ty;
- llvm::StructType *RetTy = llvm::StructType::get(I32, I32, I32, I32);
if (CGF->CGM.getTarget().getTriple().isDXIL()) {
- // dx.op.waveActiveBallot(opcode, i1)
- return CGF->Builder.CreateIntrinsic(RetTy, Intrinsic::dx_wave_ballot,
- {Cond}, nullptr, "wave.active.ballot");
+ return CGF->EmitRuntimeCall(
+ CGF->CGM.getIntrinsic(Intrinsic::dx_wave_ballot, {I32}), Cond);
}
if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
- // spv.wave.ballot(i1) -> <4 x i32>, then bitcast to struct
llvm::Type *VecTy = llvm::FixedVectorType::get(I32, 4);
- return CGF->Builder.CreateIntrinsic(VecTy, Intrinsic::spv_wave_ballot,
- {Cond}, nullptr, "spv.wave.ballot");
+
+ return CGF->EmitRuntimeCall(
+ CGF->CGM.getIntrinsic(Intrinsic::spv_wave_ballot), Cond);
}
CGF->CGM.Error(E->getExprLoc(),
- "waveActiveBallot is not supported for this target");
+ "WaveActiveBallot is not supported for this target");
- return llvm::UndefValue::get(RetTy);
+ return llvm::PoisonValue::get(llvm::FixedVectorType::get(I32, 4));
}
static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
index 61b077eb1fead..ceee9eb015512 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
@@ -10,8 +10,13 @@
// CHECK-LABEL: define {{.*}}test
uint4 test(bool p1) {
// CHECK-SPIRV: %[[#entry_tok0:]] = call token @llvm.experimental.convergence.entry()
- // CHECK-SPIRV: %[[RET:.*]] = call spir_func <4 x i32> @llvm.spv.wave.ballot(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ]
- // CHECK-DXIL: %[[RET:.*]] = call <4 x i32> @llvm.dx.wave.ballot(i1 %{{[a-zA-Z0-9]+}})
- // CHECK: ret <4 x i32> %[[RET]]
+ // CHECK-SPIRV: %[[RET:.*]] = call spir_func <4 x i32> @llvm.spv.wave.ballot(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ]
+ // CHECK-DXIL: %[[RETVAL:.*]] = alloca <4 x i32>, align 16
+ // CHECK-DXIL: %[[WAB:.*]] = call { i32, i32, i32, i32 } @llvm.dx.wave.ballot.i32(i1 %{{[a-zA-Z0-9]+}})
+ // CHECK-DXIL: store { i32, i32, i32, i32 } %[[WAB]], ptr %[[RETVAL]], align 16
+ // CHECK-DXIL: %[[LOAD:.*]] = load <4 x i32>, ptr %[[RETVAL]], align 16
+ // CHECK-DXIL: ret <4 x i32> %[[LOAD]]
+ // CHECK-SPIRV: ret <4 x i32> %[[RET]]
+
return WaveActiveBallot(p1);
}
>From 4b7b4a552c6d83e385c16eee0275878e73dc36c1 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Fri, 9 Jan 2026 00:25:27 -0800
Subject: [PATCH 03/10] remove unused var
---
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 1e3f5611e69d1..13c6dadf86ca7 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -171,8 +171,6 @@ static Value *handleHlslWaveActiveBallot(const CallExpr *E,
}
if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
- llvm::Type *VecTy = llvm::FixedVectorType::get(I32, 4);
-
return CGF->EmitRuntimeCall(
CGF->CGM.getIntrinsic(Intrinsic::spv_wave_ballot), Cond);
}
>From 1cc1eaced861ac697b6836b2b98a42c97fbc3147 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Fri, 9 Jan 2026 00:27:10 -0800
Subject: [PATCH 04/10] clangformat
---
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 13c6dadf86ca7..f5ca49749dc6d 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -170,10 +170,9 @@ static Value *handleHlslWaveActiveBallot(const CallExpr *E,
CGF->CGM.getIntrinsic(Intrinsic::dx_wave_ballot, {I32}), Cond);
}
- if (CGF->CGM.getTarget().getTriple().isSPIRV()) {
+ if (CGF->CGM.getTarget().getTriple().isSPIRV())
return CGF->EmitRuntimeCall(
CGF->CGM.getIntrinsic(Intrinsic::spv_wave_ballot), Cond);
- }
CGF->CGM.Error(E->getExprLoc(),
"WaveActiveBallot is not supported for this target");
>From 7b72cf11dd6321f3186c312acd97886ebdbc01ad Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Fri, 9 Jan 2026 14:00:08 -0800
Subject: [PATCH 05/10] address Finn
---
clang/include/clang/Basic/Builtins.td | 2 +-
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 6 ++----
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index ccbc0abe3f0b4..0ab50b06e11cf 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5058,7 +5058,7 @@ def HLSLWaveActiveAnyTrue : LangBuiltin<"HLSL_LANG"> {
def HLSLWaveActiveBallot : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_wave_active_ballot"];
let Attributes = [NoThrow, Const];
- let Prototype = "void(bool)";
+ let Prototype = "_ExtVector<4, unsigned int>(bool)";
}
def HLSLWaveActiveCountBits : LangBuiltin<"HLSL_LANG"> {
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index f5ca49749dc6d..d75b5c0c6b17f 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -174,10 +174,8 @@ static Value *handleHlslWaveActiveBallot(const CallExpr *E,
return CGF->EmitRuntimeCall(
CGF->CGM.getIntrinsic(Intrinsic::spv_wave_ballot), Cond);
- CGF->CGM.Error(E->getExprLoc(),
- "WaveActiveBallot is not supported for this target");
-
- return llvm::PoisonValue::get(llvm::FixedVectorType::get(I32, 4));
+ llvm_unreachable(
+ "WaveActiveBallot is only supported for DXIL and SPIRV targets");
}
static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
>From b64387b47995d072dc0574f8736a45f15a55d23d Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Mon, 12 Jan 2026 15:47:54 -0800
Subject: [PATCH 06/10] address Farzon
---
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 40 ++++++++++++++-----
clang/lib/CodeGen/CGHLSLRuntime.h | 1 -
clang/lib/Sema/SemaHLSL.cpp | 5 ---
llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 +-
llvm/include/llvm/IR/IntrinsicsSPIRV.td | 2 +-
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 2 +-
llvm/test/CodeGen/DirectX/WaveActiveBallot.ll | 16 ++++++--
llvm/test/tools/dxil-dis/waveactiveballot.ll | 31 ++++++++++++++
8 files changed, 76 insertions(+), 23 deletions(-)
create mode 100644 llvm/test/tools/dxil-dis/waveactiveballot.ll
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index d75b5c0c6b17f..8e491ee318bb9 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -160,19 +160,37 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
return LastInst;
}
-static Value *handleHlslWaveActiveBallot(const CallExpr *E,
- CodeGenFunction *CGF) {
- Value *Cond = CGF->EmitScalarExpr(E->getArg(0));
- llvm::Type *I32 = CGF->Int32Ty;
+static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF,
+ const CallExpr *E) {
+ Value *Cond = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Type *I32 = CGF.Int32Ty;
+
+ llvm::Type *Vec4I32 = llvm::FixedVectorType::get(I32, 4);
+ llvm::StructType *Struct4I32 =
+ llvm::StructType::get(CGF.getLLVMContext(), {I32, I32, I32, I32});
+
+ if (CGF.CGM.getTarget().getTriple().isDXIL()) {
+ // Call DXIL intrinsic: returns { i32, i32, i32, i32 }
+ llvm::Function *Fn = CGF.CGM.getIntrinsic(Intrinsic::dx_wave_ballot, {I32});
+
+ Value *StructVal = CGF.EmitRuntimeCall(Fn, Cond);
+ assert(StructVal->getType() == Struct4I32 &&
+ "dx.wave.ballot must return {i32,i32,i32,i32}");
+
+ // Reassemble struct to <4 x i32>
+ llvm::Value *VecVal = llvm::PoisonValue::get(Vec4I32);
+ for (unsigned i = 0; i < 4; ++i) {
+ Value *Elt = CGF.Builder.CreateExtractValue(StructVal, i);
+ VecVal =
+ CGF.Builder.CreateInsertElement(VecVal, Elt, CGF.Builder.getInt32(i));
+ }
- if (CGF->CGM.getTarget().getTriple().isDXIL()) {
- return CGF->EmitRuntimeCall(
- CGF->CGM.getIntrinsic(Intrinsic::dx_wave_ballot, {I32}), Cond);
+ return VecVal;
}
- if (CGF->CGM.getTarget().getTriple().isSPIRV())
- return CGF->EmitRuntimeCall(
- CGF->CGM.getIntrinsic(Intrinsic::spv_wave_ballot), Cond);
+ if (CGF.CGM.getTarget().getTriple().isSPIRV())
+ return CGF.EmitRuntimeCall(
+ CGF.CGM.getIntrinsic(Intrinsic::spv_subgroup_ballot), Cond);
llvm_unreachable(
"WaveActiveBallot is only supported for DXIL and SPIRV targets");
@@ -852,7 +870,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
assert(Op->getType()->isIntegerTy(1) &&
"Intrinsic WaveActiveBallot operand must be a bool");
- return handleHlslWaveActiveBallot(E, this);
+ return handleHlslWaveActiveBallot(*this, E);
}
case Builtin::BI__builtin_hlsl_wave_active_count_bits: {
Value *OpExpr = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 7a5643052ed84..ba2ca2c358388 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -146,7 +146,6 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(Dot4AddU8Packed, dot4add_u8packed)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAllTrue, wave_all)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAnyTrue, wave_any)
- GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBallot, wave_ballot)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveCountBits, wave_active_countbits)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveIsFirstLane, wave_is_first_lane)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveGetLaneCount, wave_get_lane_count)
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 51f74c10677a9..a6de1cd550212 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3507,11 +3507,6 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
- case Builtin::BI__builtin_hlsl_wave_active_ballot: {
- if (SemaRef.checkArgCount(TheCall, 1))
- return true;
- break;
- }
case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
if (SemaRef.checkArgCount(TheCall, 3))
return true;
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index f79945785566c..3c2c7477d8c7b 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -153,7 +153,7 @@ def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]
def int_dx_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
-def int_dx_wave_ballot : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
+def int_dx_wave_ballot : DefaultAttrsIntrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_getlaneindex : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index bcb533780b58c..da4031fadd8e9 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -120,7 +120,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
def int_spv_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
- def int_spv_wave_ballot : ClangBuiltin<"__builtin_spirv_subgroup_ballot">,
+ def int_spv_subgroup_ballot : ClangBuiltin<"__builtin_spirv_subgroup_ballot">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 100057f6d1a39..fcce327fa8b76 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3815,7 +3815,7 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
return selectWaveOpInst(ResVReg, ResType, I, SPIRV::OpGroupNonUniformAll);
case Intrinsic::spv_wave_any:
return selectWaveOpInst(ResVReg, ResType, I, SPIRV::OpGroupNonUniformAny);
- case Intrinsic::spv_wave_ballot:
+ case Intrinsic::spv_subgroup_ballot:
return selectWaveOpInst(ResVReg, ResType, I,
SPIRV::OpGroupNonUniformBallot);
case Intrinsic::spv_wave_is_first_lane:
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveBallot.ll b/llvm/test/CodeGen/DirectX/WaveActiveBallot.ll
index 31a64cbcf061e..a7186427c6d09 100644
--- a/llvm/test/CodeGen/DirectX/WaveActiveBallot.ll
+++ b/llvm/test/CodeGen/DirectX/WaveActiveBallot.ll
@@ -2,11 +2,21 @@
%dx.types.fouri32 = type { i32, i32, i32, i32 }
-define noundef %dx.types.fouri32 @wave_ballot_simple(i1 noundef %p1) {
+define <4 x i32> @wave_ballot_simple(i1 noundef %p1) {
entry:
; CHECK: call %dx.types.fouri32 @dx.op.waveActiveBallot(i32 116, i1 %p1)
- %ret = call %dx.types.fouri32 @llvm.dx.wave.ballot(i1 %p1)
- ret %dx.types.fouri32 %ret
+; CHECK-NOT: ret %dx.types.fouri32
+; CHECK: ret <4 x i32>
+ %s = call %dx.types.fouri32 @llvm.dx.wave.ballot(i1 %p1)
+ %v0 = extractvalue %dx.types.fouri32 %s, 0
+ %v1 = extractvalue %dx.types.fouri32 %s, 1
+ %v2 = extractvalue %dx.types.fouri32 %s, 2
+ %v3 = extractvalue %dx.types.fouri32 %s, 3
+ %vec = insertelement <4 x i32> poison, i32 %v0, i32 0
+ %vec1 = insertelement <4 x i32> %vec, i32 %v1, i32 1
+ %vec2 = insertelement <4 x i32> %vec1, i32 %v2, i32 2
+ %vec3 = insertelement <4 x i32> %vec2, i32 %v3, i32 3
+ ret <4 x i32> %vec3
}
declare %dx.types.fouri32 @llvm.dx.wave.ballot(i1)
diff --git a/llvm/test/tools/dxil-dis/waveactiveballot.ll b/llvm/test/tools/dxil-dis/waveactiveballot.ll
new file mode 100644
index 0000000000000..2bdb4ec98a3db
--- /dev/null
+++ b/llvm/test/tools/dxil-dis/waveactiveballot.ll
@@ -0,0 +1,31 @@
+; RUN: llc %s --filetype=obj -o - | dxil-dis -o - | FileCheck %s
+
+; CHECK-NOT: llvm.dx.wave.ballot
+
+; CHECK: call %dx.types.fouri32 @dx.op.waveActiveBallot(i32 116, i1 %p1)
+; CHECK-NOT: ret %dx.types.fouri32
+; CHECK: ret <4 x i32>
+
+
+target triple = "dxil-unknown-shadermodel6.3-library"
+
+%dx.types.fouri32 = type { i32, i32, i32, i32 }
+
+define <4 x i32> @wave_ballot_simple(i1 %p1) {
+entry:
+ %s = call %dx.types.fouri32 @llvm.dx.wave.ballot(i1 %p1)
+
+ %v0 = extractvalue %dx.types.fouri32 %s, 0
+ %v1 = extractvalue %dx.types.fouri32 %s, 1
+ %v2 = extractvalue %dx.types.fouri32 %s, 2
+ %v3 = extractvalue %dx.types.fouri32 %s, 3
+
+ %vec0 = insertelement <4 x i32> poison, i32 %v0, i32 0
+ %vec1 = insertelement <4 x i32> %vec0, i32 %v1, i32 1
+ %vec2 = insertelement <4 x i32> %vec1, i32 %v2, i32 2
+ %vec3 = insertelement <4 x i32> %vec2, i32 %v3, i32 3
+
+ ret <4 x i32> %vec3
+}
+
+declare %dx.types.fouri32 @llvm.dx.wave.ballot(i1)
>From 85a428f3de6fd14e85fd858388026777357e16c8 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Mon, 12 Jan 2026 17:02:12 -0800
Subject: [PATCH 07/10] address Farzon
---
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 6 +++---
.../CodeGenHLSL/builtins/WaveActiveBallot.hlsl | 17 +++++++++++------
llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 +-
3 files changed, 15 insertions(+), 10 deletions(-)
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 8e491ee318bb9..75995ff940bc4 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -179,10 +179,10 @@ static Value *handleHlslWaveActiveBallot(CodeGenFunction &CGF,
// Reassemble struct to <4 x i32>
llvm::Value *VecVal = llvm::PoisonValue::get(Vec4I32);
- for (unsigned i = 0; i < 4; ++i) {
- Value *Elt = CGF.Builder.CreateExtractValue(StructVal, i);
+ for (unsigned I = 0; I < 4; ++I) {
+ Value *Elt = CGF.Builder.CreateExtractValue(StructVal, I);
VecVal =
- CGF.Builder.CreateInsertElement(VecVal, Elt, CGF.Builder.getInt32(i));
+ CGF.Builder.CreateInsertElement(VecVal, Elt, CGF.Builder.getInt32(I));
}
return VecVal;
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
index ceee9eb015512..3ae271c4ecd75 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
@@ -10,13 +10,18 @@
// CHECK-LABEL: define {{.*}}test
uint4 test(bool p1) {
// CHECK-SPIRV: %[[#entry_tok0:]] = call token @llvm.experimental.convergence.entry()
- // CHECK-SPIRV: %[[RET:.*]] = call spir_func <4 x i32> @llvm.spv.wave.ballot(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ]
- // CHECK-DXIL: %[[RETVAL:.*]] = alloca <4 x i32>, align 16
+ // CHECK-SPIRV: %[[SPIRVRET:.*]] = call spir_func <4 x i32> @llvm.spv.subgroup.ballot(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ]
// CHECK-DXIL: %[[WAB:.*]] = call { i32, i32, i32, i32 } @llvm.dx.wave.ballot.i32(i1 %{{[a-zA-Z0-9]+}})
- // CHECK-DXIL: store { i32, i32, i32, i32 } %[[WAB]], ptr %[[RETVAL]], align 16
- // CHECK-DXIL: %[[LOAD:.*]] = load <4 x i32>, ptr %[[RETVAL]], align 16
- // CHECK-DXIL: ret <4 x i32> %[[LOAD]]
- // CHECK-SPIRV: ret <4 x i32> %[[RET]]
+ // CHECK-DXIL: extractvalue { i32, i32, i32, i32 } {{.*}} 0
+ // CHECK-DXIL: insertelement <4 x i32> poison, i32 {{.*}}, i32 0
+ // CHECK-DXIL: extractvalue { i32, i32, i32, i32 } {{.*}} 1
+ // CHECK-DXIL: insertelement <4 x i32> {{.*}}, i32 {{.*}}, i32 1
+ // CHECK-DXIL: extractvalue { i32, i32, i32, i32 } {{.*}} 2
+ // CHECK-DXIL: insertelement <4 x i32> {{.*}}, i32 {{.*}}, i32 2
+ // CHECK-DXIL: extractvalue { i32, i32, i32, i32 } {{.*}} 3
+ // CHECK-DXIL: %[[DXILRET:.*]] = insertelement <4 x i32> {{.*}}, i32 {{.*}}, i32 3
+ // CHECK-DXIL: ret <4 x i32> %[[DXILRET]]
+ // CHECK-SPIRV: ret <4 x i32> %[[SPIRVRET]]
return WaveActiveBallot(p1);
}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 3c2c7477d8c7b..f79945785566c 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -153,7 +153,7 @@ def int_dx_rsqrt : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]
def int_dx_wave_active_countbits : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
-def int_dx_wave_ballot : DefaultAttrsIntrinsic<[llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
+def int_dx_wave_ballot : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_getlaneindex : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
>From d4d6976369ef9adda3351551f98a5a7fca7d584a Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Tue, 13 Jan 2026 11:52:09 -0800
Subject: [PATCH 08/10] fix tests
---
.../CodeGenHLSL/builtins/WaveActiveBallot.hlsl | 18 +++++++++---------
.../SPIRV/hlsl-intrinsics/WaveActiveBallot.ll | 4 ++--
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
index 3ae271c4ecd75..df2d854a64247 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBallot.hlsl
@@ -12,15 +12,15 @@ uint4 test(bool p1) {
// CHECK-SPIRV: %[[#entry_tok0:]] = call token @llvm.experimental.convergence.entry()
// CHECK-SPIRV: %[[SPIRVRET:.*]] = call spir_func <4 x i32> @llvm.spv.subgroup.ballot(i1 %{{[a-zA-Z0-9]+}}) [ "convergencectrl"(token %[[#entry_tok0]]) ]
// CHECK-DXIL: %[[WAB:.*]] = call { i32, i32, i32, i32 } @llvm.dx.wave.ballot.i32(i1 %{{[a-zA-Z0-9]+}})
- // CHECK-DXIL: extractvalue { i32, i32, i32, i32 } {{.*}} 0
- // CHECK-DXIL: insertelement <4 x i32> poison, i32 {{.*}}, i32 0
- // CHECK-DXIL: extractvalue { i32, i32, i32, i32 } {{.*}} 1
- // CHECK-DXIL: insertelement <4 x i32> {{.*}}, i32 {{.*}}, i32 1
- // CHECK-DXIL: extractvalue { i32, i32, i32, i32 } {{.*}} 2
- // CHECK-DXIL: insertelement <4 x i32> {{.*}}, i32 {{.*}}, i32 2
- // CHECK-DXIL: extractvalue { i32, i32, i32, i32 } {{.*}} 3
- // CHECK-DXIL: %[[DXILRET:.*]] = insertelement <4 x i32> {{.*}}, i32 {{.*}}, i32 3
- // CHECK-DXIL: ret <4 x i32> %[[DXILRET]]
+ // CHECK-DXIL-NEXT: extractvalue { i32, i32, i32, i32 } {{.*}} 0
+ // CHECK-DXIL-NEXT: insertelement <4 x i32> poison, i32 {{.*}}, i32 0
+ // CHECK-DXIL-NEXT: extractvalue { i32, i32, i32, i32 } {{.*}} 1
+ // CHECK-DXIL-NEXT: insertelement <4 x i32> {{.*}}, i32 {{.*}}, i32 1
+ // CHECK-DXIL-NEXT: extractvalue { i32, i32, i32, i32 } {{.*}} 2
+ // CHECK-DXIL-NEXT: insertelement <4 x i32> {{.*}}, i32 {{.*}}, i32 2
+ // CHECK-DXIL-NEXT: extractvalue { i32, i32, i32, i32 } {{.*}} 3
+ // CHECK-DXIL-NEXT: %[[DXILRET:.*]] = insertelement <4 x i32> {{.*}}, i32 {{.*}}, i32 3
+ // CHECK-DXIL-NEXT: ret <4 x i32> %[[DXILRET]]
// CHECK-SPIRV: ret <4 x i32> %[[SPIRVRET]]
return WaveActiveBallot(p1);
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBallot.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBallot.ll
index 6831888f038fd..e38d77360631b 100644
--- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBallot.ll
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBallot.ll
@@ -13,10 +13,10 @@ entry:
; CHECK: %[[#param:]] = OpFunctionParameter %[[#bool]]
; CHECK: %{{.+}} = OpGroupNonUniformBallot %[[#bitmask]] %[[#scope]] %[[#param]]
%0 = call token @llvm.experimental.convergence.entry()
- %ret = call <4 x i32> @llvm.spv.wave.ballot(i1 %p1) [ "convergencectrl"(token %0) ]
+ %ret = call <4 x i32> @llvm.spv.subgroup.ballot(i1 %p1) [ "convergencectrl"(token %0) ]
ret <4 x i32> %ret
}
-declare <4 x i32> @llvm.spv.wave.ballot(i1) #0
+declare <4 x i32> @llvm.spv.subgroup.ballot(i1) #0
attributes #0 = { convergent }
>From b64391b08af0e15bd243104f51c34090f2a4d3fe Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Tue, 13 Jan 2026 13:13:06 -0800
Subject: [PATCH 09/10] fix one failing test
---
clang/test/CodeGenSPIRV/Builtins/subgroup.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/CodeGenSPIRV/Builtins/subgroup.c b/clang/test/CodeGenSPIRV/Builtins/subgroup.c
index 2ae2013c3c23e..ce5a0bb9c6a4d 100644
--- a/clang/test/CodeGenSPIRV/Builtins/subgroup.c
+++ b/clang/test/CodeGenSPIRV/Builtins/subgroup.c
@@ -9,7 +9,7 @@ typedef unsigned __attribute__((ext_vector_type(4))) int4;
// CHECK: @{{.*}}test_subgroup_shuffle{{.*}}(
// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: tail call <4 x i32> @llvm.spv.wave.ballot(i1 %i)
+// CHECK-NEXT: tail call <4 x i32> @llvm.spv.subgroup.ballot(i1 %i)
[[clang::sycl_external]] int4 test_subgroup_shuffle(_Bool i) {
return __builtin_spirv_subgroup_ballot(i);
}
>From 0ff3fa98579f569889a33bd2a912851305dcdfd6 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Tue, 13 Jan 2026 14:32:50 -0800
Subject: [PATCH 10/10] fix other failing test
---
clang/test/CodeGenSPIRV/Builtins/subgroup.c | 2 +-
clang/test/Headers/gpuintrin.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/test/CodeGenSPIRV/Builtins/subgroup.c b/clang/test/CodeGenSPIRV/Builtins/subgroup.c
index 531698b51235d..ba6b48e3f3848 100644
--- a/clang/test/CodeGenSPIRV/Builtins/subgroup.c
+++ b/clang/test/CodeGenSPIRV/Builtins/subgroup.c
@@ -10,7 +10,7 @@ typedef unsigned __attribute__((ext_vector_type(4))) int4;
// CHECK: @{{.*}}test_subgroup_ballot{{.*}}(
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call <4 x i32> @llvm.spv.subgroup.ballot(i1 %i)
-[[clang::sycl_external]] int4 test_subgroup_shuffle(_Bool i) {
+[[clang::sycl_external]] int4 test_subgroup_ballot(_Bool i) {
return __builtin_spirv_subgroup_ballot(i);
}
diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c
index c8fe721c8c37c..891a5abf7a72a 100644
--- a/clang/test/Headers/gpuintrin.c
+++ b/clang/test/Headers/gpuintrin.c
@@ -1267,7 +1267,7 @@ __gpu_kernel void foo() {
// SPIRV-NEXT: [[ENTRY:.*:]]
// SPIRV-NEXT: [[__MASK:%.*]] = alloca <4 x i32>, align 16
// SPIRV-NEXT: [[REF_TMP:%.*]] = alloca <2 x i32>, align 8
-// SPIRV-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.spv.wave.ballot(i1 true)
+// SPIRV-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.spv.subgroup.ballot(i1 true)
// SPIRV-NEXT: store <4 x i32> [[TMP0]], ptr [[__MASK]], align 16
// SPIRV-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[__MASK]], align 16
// SPIRV-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[__MASK]], align 16
@@ -1335,7 +1335,7 @@ __gpu_kernel void foo() {
// SPIRV-NEXT: store i8 [[STOREDV]], ptr [[__X_ADDR]], align 1
// SPIRV-NEXT: [[TMP0:%.*]] = load i8, ptr [[__X_ADDR]], align 1
// SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP0]] to i1
-// SPIRV-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.spv.wave.ballot(i1 [[LOADEDV]])
+// SPIRV-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.spv.subgroup.ballot(i1 [[LOADEDV]])
// SPIRV-NEXT: store <4 x i32> [[TMP1]], ptr [[__MASK]], align 16
// SPIRV-NEXT: [[TMP2:%.*]] = load i64, ptr [[__LANE_MASK_ADDR]], align 8
// SPIRV-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[__MASK]], align 16
More information about the cfe-commits
mailing list