[clang] [llvm] [HLSL] Add WaveActiveBitXor function (PR #185776)

Joshua Batista via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 11 10:09:56 PDT 2026


https://github.com/bob80905 updated https://github.com/llvm/llvm-project/pull/185776

>From f6032f40ac4244b297442bf2c91f2d4514273c14 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Tue, 10 Mar 2026 16:27:54 -0700
Subject: [PATCH 1/2] add wave active bit xor

---
 clang/include/clang/Basic/Builtins.td         |  6 ++
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          | 13 ++-
 clang/lib/CodeGen/CGHLSLRuntime.h             |  1 +
 .../lib/Headers/hlsl/hlsl_alias_intrinsics.h  | 34 ++++++++
 clang/lib/Sema/SemaHLSL.cpp                   |  1 +
 .../builtins/WaveActiveBitXor-errors.hlsl     | 23 ++++++
 .../builtins/WaveActiveBitXor.hlsl            | 82 +++++++++++++++++++
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  1 +
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |  1 +
 llvm/lib/Target/DirectX/DXIL.td               |  6 +-
 llvm/lib/Target/DirectX/DXILShaderFlags.cpp   |  1 +
 .../DirectX/DirectXTargetTransformInfo.cpp    |  1 +
 .../Target/SPIRV/SPIRVInstructionSelector.cpp |  3 +
 .../CodeGen/DirectX/ShaderFlags/wave-ops.ll   |  7 ++
 llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll | 19 +++++
 .../SPIRV/hlsl-intrinsics/WaveActiveBitXor.ll | 32 ++++++++
 16 files changed, 229 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitXor.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index dd5bd689c08d2..d7ab459091110 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5180,6 +5180,12 @@ def HLSLWaveActiveBitOr : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void (...)";
 }
 
+def HLSLWaveActiveBitXor : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_wave_active_bit_xor"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void (...)";
+}
+
 def HLSLWaveActiveBallot : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_wave_active_ballot"];
   let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index a5db9d8562662..ecc5b7b306266 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -1214,7 +1214,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
   case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
     Value *Op = EmitScalarExpr(E->getArg(0));
     assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
-           "Intrinsic WaveActiveBitOr operand must have a unsigned integer "
+           "Intrinsic WaveActiveBitOr operand must have an unsigned integer "
            "representation");
 
     Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitOrIntrinsic();
@@ -1222,6 +1222,17 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
                                &CGM.getModule(), ID, {Op->getType()}),
                            ArrayRef{Op}, "hlsl.wave.active.bit.or");
   }
+  case Builtin::BI__builtin_hlsl_wave_active_bit_xor: {
+    Value *Op = EmitScalarExpr(E->getArg(0));
+    assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation() &&
+           "Intrinsic WaveActiveBitXor operand must have an unsigned integer "
+           "representation");
+
+    Intrinsic::ID ID = CGM.getHLSLRuntime().getWaveActiveBitXorIntrinsic();
+    return EmitRuntimeCall(Intrinsic::getOrInsertDeclaration(
+                               &CGM.getModule(), ID, {Op->getType()}),
+                           ArrayRef{Op}, "hlsl.wave.active.bit.xor");
+  }
   case Builtin::BI__builtin_hlsl_wave_active_ballot: {
     [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0));
     assert(Op->getType()->isIntegerTy(1) &&
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 4d3e089ca7140..bb615b0859633 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -148,6 +148,7 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAllTrue, wave_all)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveAnyTrue, wave_any)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitOr, wave_reduce_or)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitXor, wave_reduce_xor)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMax, wave_reduce_max)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveUMax, wave_reduce_umax)
   GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMin, wave_reduce_min)
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 440bb5533c278..57d1e3bddefca 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -2809,6 +2809,40 @@ _HLSL_AVAILABILITY(shadermodel, 6.0)
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_or)
 __attribute__((convergent)) uint64_t4 WaveActiveBitOr(uint64_t4);
 
+//===----------------------------------------------------------------------===//
+// WaveActiveBitXor builtins
+//===----------------------------------------------------------------------===//
+
+// \brief Returns the bitwise XOR of all the values of <expr> across all active
+// non-helper lanes in the current wave, and replicates it back to
+// all active non-helper lanes.
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint WaveActiveBitXor(uint);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint2 WaveActiveBitXor(uint2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint3 WaveActiveBitXor(uint3);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint4 WaveActiveBitXor(uint4);
+
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint64_t WaveActiveBitXor(uint64_t);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint64_t2 WaveActiveBitXor(uint64_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint64_t3 WaveActiveBitXor(uint64_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.0)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_wave_active_bit_xor)
+__attribute__((convergent)) uint64_t4 WaveActiveBitXor(uint64_t4);
+
 //===----------------------------------------------------------------------===//
 // WaveActiveMax builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index f4423862d49a0..4742898586a52 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -4012,6 +4012,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     TheCall->setType(ArgTyExpr);
     break;
   }
+  case Builtin::BI__builtin_hlsl_wave_active_bit_xor:
   case Builtin::BI__builtin_hlsl_wave_active_bit_or: {
     if (SemaRef.checkArgCount(TheCall, 1))
       return true;
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
new file mode 100644
index 0000000000000..1e3d070933dea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
+
+uint test_too_few_arg() {
+  return __builtin_hlsl_wave_active_bit_xor();
+  // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+uint test_too_many_arg(uint p0) {
+  return __builtin_hlsl_wave_active_bit_xor(p0, p0);
+  // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+struct S { uint x; };
+
+uint test_expr_struct_type_check(S p0) {
+  return __builtin_hlsl_wave_active_bit_xor(p0);
+  // expected-error at -1 {{invalid operand of type 'S' where a scalar or vector is required}}
+}
+
+bool test_expr_bool_type_check(bool p0) {
+  return __builtin_hlsl_wave_active_bit_xor(p0);
+  // expected-error at -1 {{invalid operand of type 'bool'}}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
new file mode 100644
index 0000000000000..0c27a569b7b5d
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
+// RUN:   dxil-pc-shadermodel6.3-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN:   FileCheck %s --check-prefixes=CHECK,DXCHECK -DCALL="call"
+
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \
+// RUN:   spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \
+// RUN:   FileCheck %s --check-prefixes=CHECK,SPVCHECK -DCALL="call spir_func"
+
+// Test basic lowering to runtime function call.
+
+// CHECK-LABEL: test_uint
+uint test_uint(uint expr) {
+  // DXCHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF:dx]].wave.reduce.xor.i32([[TY]] %[[#]])
+  // SPVCHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF:spv]].wave.reduce.xor.i32([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.i32([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint2
+uint2 test_uint2(uint2 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v2i32([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v2i32([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint3
+uint3 test_uint3(uint3 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v3i32([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v3i32([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint4
+uint4 test_uint4(uint4 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v4i32([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v4i32([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t
+uint64_t test_uint64_t(uint64_t expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.i64([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.i64([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t2
+uint64_t2 test_uint64_t2(uint64_t2 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v2i64([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v2i64([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t3
+uint64_t3 test_uint64_t3(uint64_t3 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v3i64([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v3i64([[TY]]) #[[#attr:]]
+
+// CHECK-LABEL: test_uint64_t4
+uint64_t4 test_uint64_t4(uint64_t4 expr) {
+  // CHECK:  %[[RET:.*]] = [[CALL]] [[TY:.*]] @llvm.[[ICF]].wave.reduce.xor.v4i64([[TY]] %[[#]])
+  // CHECK:  ret [[TY]] %[[RET]]
+  return WaveActiveBitXor(expr);
+}
+
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v4i64([[TY]]) #[[#attr:]]
\ No newline at end of file
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 29d0f4d7e46cf..114ec5f1b9f69 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -231,6 +231,7 @@ def int_dx_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrCon
 def int_dx_wave_ballot : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
 def int_dx_wave_getlaneindex : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrConvergent, IntrNoMem]>;
 def int_dx_wave_reduce_or : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
+def int_dx_wave_reduce_xor : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
 def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
 def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
 def int_dx_wave_reduce_min : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 6777ddef292d7..923418739e550 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -124,6 +124,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
   def int_spv_wave_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
   def int_spv_wave_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
   def int_spv_wave_reduce_or : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
+  def int_spv_wave_reduce_xor : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
   def int_spv_subgroup_ballot : ClangBuiltin<"__builtin_spirv_subgroup_ballot">,
     DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>;
   def int_spv_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 1dc3f34e152e2..9bc26a79d2d26 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -1152,7 +1152,11 @@ def WaveActiveBit : DXILOp<120, waveActiveBit> {
   let intrinsics = [
     IntrinSelect<int_dx_wave_reduce_or,
                  [
-                   IntrinArgIndex<0>, IntrinArgI8<WaveBitOpKind_Or>,
+                   IntrinArgIndex<0>, IntrinArgI8<WaveBitOpKind_Or>,                   
+                 ]>,
+    IntrinSelect<int_dx_wave_reduce_xor,
+                 [
+                   IntrinArgIndex<0>, IntrinArgI8<WaveBitOpKind_Xor>,
                  ]>,
   ];
 
diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
index b9541daeead31..50d08b3a66dc1 100644
--- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
+++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
@@ -92,6 +92,7 @@ static bool checkWaveOps(Intrinsic::ID IID) {
   case Intrinsic::dx_wave_prefix_bit_count:
   // Wave Active Op Variants
   case Intrinsic::dx_wave_reduce_or:
+  case Intrinsic::dx_wave_reduce_xor:
   case Intrinsic::dx_wave_reduce_sum:
   case Intrinsic::dx_wave_reduce_usum:
   case Intrinsic::dx_wave_product:
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index a36ca97dba2be..7c1ef63a2d21c 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -63,6 +63,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
   case Intrinsic::dx_wave_all_equal:
   case Intrinsic::dx_wave_readlane:
   case Intrinsic::dx_wave_reduce_or:
+  case Intrinsic::dx_wave_reduce_xor:
   case Intrinsic::dx_wave_reduce_max:
   case Intrinsic::dx_wave_reduce_min:
   case Intrinsic::dx_wave_reduce_sum:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 002ee0d6e13a8..518c12f6685b6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -4258,6 +4258,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   case Intrinsic::spv_wave_reduce_or:
     return selectWaveReduceOp(ResVReg, ResType, I,
                               SPIRV::OpGroupNonUniformBitwiseOr);
+  case Intrinsic::spv_wave_reduce_xor:
+    return selectWaveReduceOp(ResVReg, ResType, I,
+                              SPIRV::OpGroupNonUniformBitwiseXor);
   case Intrinsic::spv_wave_reduce_umax:
     return selectWaveReduceMax(ResVReg, ResType, I, /*IsUnsigned*/ true);
   case Intrinsic::spv_wave_reduce_max:
diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll
index c42bf2d5fdf17..55db9a391a2bc 100644
--- a/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll
+++ b/llvm/test/CodeGen/DirectX/ShaderFlags/wave-ops.ll
@@ -49,6 +49,13 @@ entry:
   ret i32 %ret
 }
 
+define noundef i32 @wave_bit_xor(i32 %x) {
+entry:
+  ; CHECK: Function wave_bit_xor : [[WAVE_FLAG]]
+  %ret = call i32 @llvm.dx.wave.reduce.xor(i32 %x)
+  ret i32 %ret
+}
+
 define noundef i1 @wave_all_equal(i1 %x) {
 entry:
   ; CHECK: Function wave_all_equal : [[WAVE_FLAG]]
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll b/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
new file mode 100644
index 0000000000000..a17564f2bc0b4
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-compute %s | FileCheck %s
+
+define noundef i32 @wave_bitxor_simple(i32 noundef %p1) {
+entry:
+; CHECK: call i32 @dx.op.waveActiveBit.i32(i32 120, i32 %p1, i8 2){{$}}
+  %ret = call i32 @llvm.dx.wave.reduce.xor.i32(i32 %p1)
+  ret i32 %ret
+}
+
+declare i32 @llvm.dx.wave.reduce.xor.i32(i32)
+
+define noundef i64 @wave_bitxor_simple64(i64 noundef %p1) {
+entry:
+; CHECK: call i64 @dx.op.waveActiveBit.i64(i32 120, i64 %p1, i8 2){{$}}
+  %ret = call i64 @llvm.dx.wave.reduce.xor.i64(i64 %p1)
+  ret i64 %ret
+}
+
+declare i64 @llvm.dx.wave.reduce.xor.i64(i64)
\ No newline at end of file
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitXor.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitXor.ll
new file mode 100644
index 0000000000000..3b8ff3fadc402
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveActiveBitXor.ll
@@ -0,0 +1,32 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %}
+
+; Test lowering to spir-v backend for various types and scalar/vector
+
+; CHECK: OpCapability GroupNonUniformArithmetic
+
+; CHECK-DAG:   %[[#uint:]] = OpTypeInt 32 0
+; CHECK-DAG:   %[[#uint64:]] = OpTypeInt 64 0
+; CHECK-DAG:   %[[#scope:]] = OpConstant %[[#uint]] 3
+
+; CHECK-LABEL: Begin function test_uint
+; CHECK:   %[[#iexpr:]] = OpFunctionParameter %[[#uint]]
+define i32 @test_uint(i32 %iexpr) {
+entry:
+; CHECK:   %[[#iret:]] = OpGroupNonUniformBitwiseXor %[[#uint]] %[[#scope]] Reduce %[[#iexpr]]
+  %0 = call i32 @llvm.spv.wave.reduce.xor.i32(i32 %iexpr)
+  ret i32 %0
+}
+
+declare i32 @llvm.spv.wave.reduce.xor.i32(i32)
+
+; CHECK-LABEL: Begin function test_uint64
+; CHECK:   %[[#iexpr64:]] = OpFunctionParameter %[[#uint64]]
+define i64 @test_uint64(i64 %iexpr64) {
+entry:
+; CHECK:   %[[#iret:]] = OpGroupNonUniformBitwiseXor %[[#uint64]] %[[#scope]] Reduce %[[#iexpr64]]
+  %0 = call i64 @llvm.spv.wave.reduce.xor.i64(i64 %iexpr64)
+  ret i64 %0
+}
+
+declare i64 @llvm.spv.wave.reduce.xor.i64(i64)

>From bb40c4a55b9bf1ca10e15236f6141aab184d8d51 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista at microsoft.com>
Date: Tue, 10 Mar 2026 17:26:47 -0700
Subject: [PATCH 2/2] self review, missing new lines at eof

---
 clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl | 2 +-
 clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl        | 2 +-
 llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll                | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
index 1e3d070933dea..b5870fb61195d 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor-errors.hlsl
@@ -20,4 +20,4 @@ uint test_expr_struct_type_check(S p0) {
 bool test_expr_bool_type_check(bool p0) {
   return __builtin_hlsl_wave_active_bit_xor(p0);
   // expected-error at -1 {{invalid operand of type 'bool'}}
-}
\ No newline at end of file
+}
diff --git a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
index 0c27a569b7b5d..9c94663390843 100644
--- a/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/WaveActiveBitXor.hlsl
@@ -79,4 +79,4 @@ uint64_t4 test_uint64_t4(uint64_t4 expr) {
   return WaveActiveBitXor(expr);
 }
 
-// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v4i64([[TY]]) #[[#attr:]]
\ No newline at end of file
+// CHECK: declare [[TY]] @llvm.[[ICF]].wave.reduce.xor.v4i64([[TY]]) #[[#attr:]]
diff --git a/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll b/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
index a17564f2bc0b4..26b56718b3e07 100644
--- a/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
+++ b/llvm/test/CodeGen/DirectX/WaveActiveBitXor.ll
@@ -16,4 +16,4 @@ entry:
   ret i64 %ret
 }
 
-declare i64 @llvm.dx.wave.reduce.xor.i64(i64)
\ No newline at end of file
+declare i64 @llvm.dx.wave.reduce.xor.i64(i64)



More information about the cfe-commits mailing list