[clang] [llvm] [HLSL] Add WaveActiveBitXor function (PR #185776)
Joshua Batista via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 11 10:09:56 PDT 2026
https://github.com/bob80905 updated https://github.com/llvm/llvm-project/pull/185776
>From f6032f40ac4244b297442bf2c91f2d4514273c14 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Tue, 10 Mar 2026 16:27:54 -0700
Subject: [PATCH 1/2] add wave active bit xor
---
clang/include/clang/Basic/Builtins.td | 6 ++
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 13 ++-
clang/lib/CodeGen/CGHLSLRuntime.h | 1 +
.../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 34 ++++++++
clang/lib/Sema/SemaHLSL.cpp | 1 +
.../builtins/WaveActiveBitXor-errors.hlsl | 23 ++++++
.../builtins/WaveActiveBitXor.hlsl | 82 +++++++++++++++++++
llvm/include/llvm/IR/IntrinsicsDirectX.td | 1 +
llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 +
llvm/lib/Target/DirectX/DXIL.td | 6 +-
llvm/lib/Target/DirectX/DXILShaderFlags.cpp | 1 +
.../DirectX/DirectXTargetTransformInfo.cpp | 1 +
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 3 +
.../CodeGen/DirectX/ShaderFlags/wave-ops.ll | 7 ++
llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll | 19 +++++
.../SPIRV/hlsl-intrinsics/WaveActiveBitXor.ll | 32 ++++++++
16 files changed, 229 insertions(+), 2 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
create mode 100644 clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
create mode 100644 llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitXor.ll
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index dd5bd689c08d2..d7ab459091110 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5180,6 +5180,12 @@ def HLSLWaveActiveBitOr : LangBuiltin<"HLSL_LANG"> {
let Prototype = "void (...)";
}
+def HLSLWaveActiveBitXor : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_wave_active_bit_xor"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void (...)";
+}
+
def HLSLWaveActiveBallot : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_wave_active_ballot"];
let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index a5db9d8562662..ecc5b7b306266 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -1214,7 +1214,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
Value *Op = EmitScalarExpr(E->getArg(0));
assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
- "Intrinsic WaveActiveBitOr operand must have a unsigned integer "
+ "Intrinsic WaveActiveBitOr operand must have an unsigned integer "
"representation");
Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitOrIntrinsic();
@@ -1222,6 +1222,17 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
&CGM.getModule(), ID, {Op->getType()}),
ArrayRef{Op}, "hlsl.wave.active.bit.or");
}
+ case Builtin::BI__builtin_hlsl_wave_active_bit_xor: {
+ Value *Op = EmitScalarExpr(E->getArg(0));
+ assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
+ "Intrinsic WaveActiveBitXor operand must have an unsigned integer "
+ "representation");
+
+ Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitXorIntrinsic();
+ return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
+ &CGM.getModule(), ID, {Op->getType()}),
+ ArrayRef{Op}, "hlsl.wave.active.bit.xor");
+ }
case Builtin::BI__builtin_hlsl_wave_active_ballot: {
[[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0));
assert(Op->getType()->isIntegerTy(1) &&
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 4d3e089ca7140..bb615b0859633 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -148,6 +148,7 @@ class CGHLSLRuntime {
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAllTrue, wave_all)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAnyTrue, wave_any)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitOr, wave_reduce_or)
+ GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitXor, wave_reduce_xor)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMax, wave_reduce_max)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveUMax, wave_reduce_umax)
GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMin, wave_reduce_min)
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 440bb5533c278..57d1e3bddefca 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -2809,6 +2809,40 @@ _HLSL_AVAILABILITY(shadermodel, 6.0)
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_or)
__attribute__((convergent)) uint64_t4 WaveActiveBitOr(uint64_t4);
+//===----------------------------------------------------------------------===//
+// WaveActiveBitXor builtins
+//===----------------------------------------------------------------------===//
+
+// \brief Returns the bitwise XOR of all the values of <expr> across all active
+// non-helper lanes in the current wave, and replicates it back to
+// all active non-helper lanes.
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint WaveActiveBitXor(uint);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint2 WaveActiveBitXor(uint2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint3 WaveActiveBitXor(uint3);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint4 WaveActiveBitXor(uint4);
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint64_t WaveActiveBitXor(uint64_t);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint64_t2 WaveActiveBitXor(uint64_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint64_t3 WaveActiveBitXor(uint64_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint64_t4 WaveActiveBitXor(uint64_t4);
+
//===----------------------------------------------------------------------===//
// WaveActiveMax builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index f4423862d49a0..4742898586a52 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -4012,6 +4012,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
TheCall->setType(ArgTyExpr);
break;
}
+ case Builtin::BI__builtin_hlsl_wave_active_bit_xor:
case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
if (SemaRef.checkArgCount(TheCall, 1))
return true;
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
new file mode 100644
index 0000000000000..1e3d070933dea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
+
+uint test_too_few_arg() {
+ return __builtin_hlsl_wave_active_bit_xor();
+ // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+uint test_too_many_arg(uint p0) {
+ return __builtin_hlsl_wave_active_bit_xor(p0, p0);
+ // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+struct S { uint x; };
+
+uint test_expr_struct_type_check(S p0) {
+ return __builtin_hlsl_wave_active_bit_xor(p0);
+ // expected-error at -1 {{invalid operand of type 'S' where a scalar or vector is required}}
+}
+
+bool test_expr_bool_type_check(bool p0) {
+ return __builtin_hlsl_wave_active_bit_xor(p0);
+ // expected-error at -1 {{invalid operand of type 'bool'}}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
new file mode 100644
index 0000000000000..0c27a569b7b5d
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
+// RUN: dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK,DXCHECK -DCALL="call"
+
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
+// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN: FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call spir_func"
+
+// Test basic lowering to runtime function call.
+
+// CHECK-LABEL: test_uint
+uint test_uint(uint expr) {
+ // DXCHECK: %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF:dx]].wave.reduce.xor.i32([[TY]] %[[#]])
+ // SPVCHECK: %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF:spv]].wave.reduce.xor.i32([[TY]] %[[#]])
+ // CHECK: ret [[TY]] %[[RET]]
+ return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.i32([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint2
+uint2 test_uint2(uint2 expr) {
+ // CHECK: %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v2i32([[TY]] %[[#]])
+ // CHECK: ret [[TY]] %[[RET]]
+ return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v2i32([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint3
+uint3 test_uint3(uint3 expr) {
+ // CHECK: %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v3i32([[TY]] %[[#]])
+ // CHECK: ret [[TY]] %[[RET]]
+ return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v3i32([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint4
+uint4 test_uint4(uint4 expr) {
+ // CHECK: %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v4i32([[TY]] %[[#]])
+ // CHECK: ret [[TY]] %[[RET]]
+ return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v4i32([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t
+uint64_t test_uint64_t(uint64_t expr) {
+ // CHECK: %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.i64([[TY]] %[[#]])
+ // CHECK: ret [[TY]] %[[RET]]
+ return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.i64([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t2
+uint64_t2 test_uint64_t2(uint64_t2 expr) {
+ // CHECK: %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v2i64([[TY]] %[[#]])
+ // CHECK: ret [[TY]] %[[RET]]
+ return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v2i64([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t3
+uint64_t3 test_uint64_t3(uint64_t3 expr) {
+ // CHECK: %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v3i64([[TY]] %[[#]])
+ // CHECK: ret [[TY]] %[[RET]]
+ return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v3i64([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t4
+uint64_t4 test_uint64_t4(uint64_t4 expr) {
+ // CHECK: %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v4i64([[TY]] %[[#]])
+ // CHECK: ret [[TY]] %[[RET]]
+ return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v4i64([[TY]]) #[[#attr:]]
\ No newline at end of file
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 29d0f4d7e46cf..114ec5f1b9f69 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -231,6 +231,7 @@ def int_dx_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrCon
def int_dx_wave_ballot : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_getlaneindex : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_or : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
+def int_dx_wave_reduce_xor : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_dx_wave_reduce_min : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 6777ddef292d7..923418739e550 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -124,6 +124,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
def int_spv_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_reduce_or : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
+ def int_spv_wave_reduce_xor : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
def int_spv_subgroup_ballot : ClangBuiltin<"__builtin_spirv_subgroup_ballot">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
def int_spv_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 1dc3f34e152e2..9bc26a79d2d26 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -1152,7 +1152,11 @@ def WaveActiveBit : DXILOp<120, waveActiveBit> {
let intrinsics = [
IntrinSelect<int_dx_wave_reduce_or,
[
- IntrinArgIndex<0>, IntrinArgI8<WaveBitOpKind_Or>,
+ IntrinArgIndex<0>, IntrinArgI8<WaveBitOpKind_Or>,
+ ]>,
+ IntrinSelect<int_dx_wave_reduce_xor,
+ [
+ IntrinArgIndex<0>, IntrinArgI8<WaveBitOpKind_Xor>,
]>,
];
diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
index b9541daeead31..50d08b3a66dc1 100644
--- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
+++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
@@ -92,6 +92,7 @@ static bool checkWaveOps(Intrinsic::ID IID) {
case Intrinsic::dx_wave_prefix_bit_count:
// Wave Active Op Variants
case Intrinsic::dx_wave_reduce_or:
+ case Intrinsic::dx_wave_reduce_xor:
case Intrinsic::dx_wave_reduce_sum:
case Intrinsic::dx_wave_reduce_usum:
case Intrinsic::dx_wave_product:
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index a36ca97dba2be..7c1ef63a2d21c 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -63,6 +63,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_wave_all_equal:
case Intrinsic::dx_wave_readlane:
case Intrinsic::dx_wave_reduce_or:
+ case Intrinsic::dx_wave_reduce_xor:
case Intrinsic::dx_wave_reduce_max:
case Intrinsic::dx_wave_reduce_min:
case Intrinsic::dx_wave_reduce_sum:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 002ee0d6e13a8..518c12f6685b6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -4258,6 +4258,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_wave_reduce_or:
return selectWaveReduceOp(ResVReg, ResType, I,
SPIRV::OpGroupNonUniformBitwiseOr);
+ case Intrinsic::spv_wave_reduce_xor:
+ return selectWaveReduceOp(ResVReg, ResType, I,
+ SPIRV::OpGroupNonUniformBitwiseXor);
case Intrinsic::spv_wave_reduce_umax:
return selectWaveReduceMax(ResVReg, ResType, I, /*IsUnsigned*/ true);
case Intrinsic::spv_wave_reduce_max:
diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll
index c42bf2d5fdf17..55db9a391a2bc 100644
--- a/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll
+++ b/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll
@@ -49,6 +49,13 @@ entry:
ret i32 %ret
}
+define noundef i32 @wave_bit_xor(i32 %x) {
+entry:
+ ; CHECK: Function wave_bit_xor : [[WAVE_FLAG]]
+ %ret = call i32 @llvm.dx.wave.reduce.xor(i32 %x)
+ ret i32 %ret
+}
+
define noundef i1 @wave_all_equal(i1 %x) {
entry:
; CHECK: Function wave_all_equal : [[WAVE_FLAG]]
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll b/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
new file mode 100644
index 0000000000000..a17564f2bc0b4
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
+
+define noundef i32 @wave_bitxor_simple(i32 noundef %p1) {
+entry:
+; CHECK: call i32 @dx.op.waveActiveBit.i32(i32 120, i32 %p1, i8 2){{$}}
+ %ret = call i32 @llvm.dx.wave.reduce.xor.i32(i32 %p1)
+ ret i32 %ret
+}
+
+declare i32 @llvm.dx.wave.reduce.xor.i32(i32)
+
+define noundef i64 @wave_bitxor_simple64(i64 noundef %p1) {
+entry:
+; CHECK: call i64 @dx.op.waveActiveBit.i64(i32 120, i64 %p1, i8 2){{$}}
+ %ret = call i64 @llvm.dx.wave.reduce.xor.i64(i64 %p1)
+ ret i64 %ret
+}
+
+declare i64 @llvm.dx.wave.reduce.xor.i64(i64)
\ No newline at end of file
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitXor.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitXor.ll
new file mode 100644
index 0000000000000..3b8ff3fadc402
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitXor.ll
@@ -0,0 +1,32 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %}
+
+; Test lowering to spir-v backend for various types and scalar/vector
+
+; CHECK: OpCapability GroupNonUniformArithmetic
+
+; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#uint64:]] = OpTypeInt 64 0
+; CHECK-DAG: %[[#scope:]] = OpConstant %[[#uint]] 3
+
+; CHECK-LABEL: Begin function test_uint
+; CHECK: %[[#iexpr:]] = OpFunctionParameter %[[#uint]]
+define i32 @test_uint(i32 %iexpr) {
+entry:
+; CHECK: %[[#iret:]] = OpGroupNonUniformBitwiseXor %[[#uint]] %[[#scope]] Reduce %[[#iexpr]]
+ %0 = call i32 @llvm.spv.wave.reduce.xor.i32(i32 %iexpr)
+ ret i32 %0
+}
+
+declare i32 @llvm.spv.wave.reduce.xor.i32(i32)
+
+; CHECK-LABEL: Begin function test_uint64
+; CHECK: %[[#iexpr64:]] = OpFunctionParameter %[[#uint64]]
+define i64 @test_uint64(i64 %iexpr64) {
+entry:
+; CHECK: %[[#iret:]] = OpGroupNonUniformBitwiseXor %[[#uint64]] %[[#scope]] Reduce %[[#iexpr64]]
+ %0 = call i64 @llvm.spv.wave.reduce.xor.i64(i64 %iexpr64)
+ ret i64 %0
+}
+
+declare i64 @llvm.spv.wave.reduce.xor.i64(i64)
>From bb40c4a55b9bf1ca10e15236f6141aab184d8d51 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Tue, 10 Mar 2026 17:26:47 -0700
Subject: [PATCH 2/2] self review, missing new lines at eof
---
clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl | 2 +-
clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl | 2 +-
llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
index 1e3d070933dea..b5870fb61195d 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
@@ -20,4 +20,4 @@ uint test_expr_struct_type_check(S p0) {
bool test_expr_bool_type_check(bool p0) {
return __builtin_hlsl_wave_active_bit_xor(p0);
// expected-error at -1 {{invalid operand of type 'bool'}}
-}
\ No newline at end of file
+}
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
index 0c27a569b7b5d..9c94663390843 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
@@ -79,4 +79,4 @@ uint64_t4 test_uint64_t4(uint64_t4 expr) {
return WaveActiveBitXor(expr);
}
-// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v4i64([[TY]]) #[[#attr:]]
\ No newline at end of file
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v4i64([[TY]]) #[[#attr:]]
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll b/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
index a17564f2bc0b4..26b56718b3e07 100644
--- a/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
+++ b/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
@@ -16,4 +16,4 @@ entry:
ret i64 %ret
}
-declare i64 @llvm.dx.wave.reduce.xor.i64(i64)
\ No newline at end of file
+declare i64 @llvm.dx.wave.reduce.xor.i64(i64)
More information about the cfe-commits
mailing list