[clang] [HLSL] Invert the result of `firstbithigh` (PR #166419)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Nov 4 11:27:57 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-hlsl
Author: Deric C. (Icohedron)
<details>
<summary>Changes</summary>
Fixes #<!-- -->145752
This PR inverts the result of `firstbithigh` by subtracting it from integer bitwidth - 1 to match the result from DXC.
Although the original issue is labeled with [DirectX], I assume the result of `firstbithigh` also needs to be inverted for SPIR-V as well because `firstbithigh` is an HLSL function and its behavior should not be different for DirectX. So this PR changes the Clang codegen for both DirectX and SPIR-V. Do let me know if this is incorrect.
This Clang codegen of `firstbithigh` also does not introduce a comparison with `-1` followed by a `select` as DXC does: https://godbolt.org/z/xd6dvsT7M
```llvm
%FirstbitSHi = call i32 @<!-- -->dx.op.unaryBits.i32(i32 34, i32 %i)
%1 = sub i32 31, %FirstbitSHi
%2 = icmp eq i32 %FirstbitSHi, -1 ; not emitted by clang
%3 = select i1 %2, i32 -1, i32 %1 ; not emitted by clang
```
The comparison with `-1` and `select` appears to be a no-op to me.
Let me know if there is a reason DXC does this and if I should add it to Clang's codegen for `firstbithigh`.
---
Full diff: https://github.com/llvm/llvm-project/pull/166419.diff
4 Files Affected:
- (modified) clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h (-72)
- (modified) clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h (+9)
- (modified) clang/lib/Headers/hlsl/hlsl_intrinsics.h (+61)
- (modified) clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl (+54-30)
``````````diff
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index 4c5861c2c5f9d..c0914914a1262 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1073,78 +1073,6 @@ float3 f16tof32(uint3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
float4 f16tof32(uint4);
-//===----------------------------------------------------------------------===//
-// firstbithigh builtins
-//===----------------------------------------------------------------------===//
-
-/// \fn T firstbithigh(T Val)
-/// \brief Returns the location of the first set bit starting from the highest
-/// order bit and working downward, per component.
-/// \param Val the input value.
-
-#ifdef __HLSL_ENABLE_16_BIT
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int16_t4);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint16_t);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint16_t2);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint16_t3);
-_HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint16_t4);
-#endif
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(int64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(int64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(int64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(int64_t4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint firstbithigh(uint64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint2 firstbithigh(uint64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint3 firstbithigh(uint64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_firstbithigh)
-uint4 firstbithigh(uint64_t4);
-
//===----------------------------------------------------------------------===//
// firstbitlow builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index c877234479ad1..8560c75016b4f 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -148,6 +148,15 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) {
return exp2(Exp) * X;
}
+template <typename T, int Bitwidth> constexpr uint firstbithigh_impl(T X) {
+ return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X);
+}
+
+template <typename T, int N, int Bitwidth>
+constexpr vector<uint, N> firstbithigh_impl(vector<T, N> X) {
+ return (Bitwidth - 1) - __builtin_hlsl_elementwise_firstbithigh(X);
+}
+
} // namespace __detail
} // namespace hlsl
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 5ba5bfb9abde0..192c3a2c974d9 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -261,6 +261,67 @@ faceforward(__detail::HLSL_FIXED_VECTOR<float, L> N,
return __detail::faceforward_impl(N, I, Ng);
}
+//===----------------------------------------------------------------------===//
+// firstbithigh builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T firstbithigh(T Val)
+/// \brief Returns the location of the first set bit starting from the lowest
+/// order bit and working upward, per component.
+/// \param Val the input value.
+
+#ifdef __HLSL_ENABLE_16_BIT
+
+template <typename T>
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+const inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value ||
+ __detail::is_same<uint16_t, T>::value,
+ uint> firstbithigh(T X) {
+ return __detail::firstbithigh_impl<T, 16>(X);
+}
+
+template <typename T, int N>
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+const
+ inline __detail::enable_if_t<__detail::is_same<int16_t, T>::value ||
+ __detail::is_same<uint16_t, T>::value,
+ vector<uint, N>> firstbithigh(vector<T, N> X) {
+ return __detail::firstbithigh_impl<T, N, 16>(X);
+}
+
+#endif
+
+template <typename T>
+const inline __detail::enable_if_t<
+ __detail::is_same<int, T>::value || __detail::is_same<uint, T>::value, uint>
+firstbithigh(T X) {
+ return __detail::firstbithigh_impl<T, 32>(X);
+}
+
+template <typename T, int N>
+const inline __detail::enable_if_t<__detail::is_same<int, T>::value ||
+ __detail::is_same<uint, T>::value,
+ vector<uint, N>>
+firstbithigh(vector<T, N> X) {
+ return __detail::firstbithigh_impl<T, N, 32>(X);
+}
+
+template <typename T>
+const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value ||
+ __detail::is_same<uint64_t, T>::value,
+ uint>
+firstbithigh(T X) {
+ return __detail::firstbithigh_impl<T, 64>(X);
+}
+
+template <typename T, int N>
+const inline __detail::enable_if_t<__detail::is_same<int64_t, T>::value ||
+ __detail::is_same<uint64_t, T>::value,
+ vector<uint, N>>
+firstbithigh(vector<T, N> X) {
+ return __detail::firstbithigh_impl<T, N, 64>(X);
+}
+
//===----------------------------------------------------------------------===//
// fmod builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
index 368d652a6f779..c8fa942fa81ff 100644
--- a/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/firstbithigh.hlsl
@@ -1,160 +1,184 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type -fnative-int16-type \
-// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s -DTARGET=dx
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=dx
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
-// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type -fnative-int16-type \
-// RUN: -emit-llvm -disable-llvm-passes \
-// RUN: -o - | FileCheck %s -DTARGET=spv
+// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
+// RUN: -fnative-int16-type -emit-llvm -o - | FileCheck %s -DTARGET=spv
#ifdef __HLSL_ENABLE_16_BIT
// CHECK-LABEL: test_firstbithigh_ushort
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i16
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i16
+// CHECK: sub i32 15, [[FBH]]
uint test_firstbithigh_ushort(uint16_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ushort2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i16
+// CHECK: sub <2 x i32> splat (i32 15), [[FBH]]
uint2 test_firstbithigh_ushort2(uint16_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ushort3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i16
+// CHECK: sub <3 x i32> splat (i32 15), [[FBH]]
uint3 test_firstbithigh_ushort3(uint16_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ushort4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i16
+// CHECK: sub <4 x i32> splat (i32 15), [[FBH]]
uint4 test_firstbithigh_ushort4(uint16_t4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i16
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i16
+// CHECK: sub i32 15, [[FBH]]
uint test_firstbithigh_short(int16_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i16
+// CHECK: sub <2 x i32> splat (i32 15), [[FBH]]
uint2 test_firstbithigh_short2(int16_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i16
+// CHECK: sub <3 x i32> splat (i32 15), [[FBH]]
uint3 test_firstbithigh_short3(int16_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_short4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i16
+// CHECK: sub <4 x i32> splat (i32 15), [[FBH]]
uint4 test_firstbithigh_short4(int16_t4 p0) {
return firstbithigh(p0);
}
#endif // __HLSL_ENABLE_16_BIT
// CHECK-LABEL: test_firstbithigh_uint
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i32
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i32
+// CHECK: sub i32 31, [[FBH]]
uint test_firstbithigh_uint(uint p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_uint2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i32
+// CHECK: sub <2 x i32> splat (i32 31), [[FBH]]
uint2 test_firstbithigh_uint2(uint2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_uint3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i32
+// CHECK: sub <3 x i32> splat (i32 31), [[FBH]]
uint3 test_firstbithigh_uint3(uint3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_uint4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32
+// CHECK: sub <4 x i32> splat (i32 31), [[FBH]]
uint4 test_firstbithigh_uint4(uint4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong
-// CHECK: call i32 @llvm.[[TARGET]].firstbituhigh.i64
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbituhigh.i64
+// CHECK: sub i32 63, [[FBH]]
uint test_firstbithigh_ulong(uint64_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbituhigh.v2i64
+// CHECK: sub <2 x i32> splat (i32 63), [[FBH]]
uint2 test_firstbithigh_ulong2(uint64_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbituhigh.v3i64
+// CHECK: sub <3 x i32> splat (i32 63), [[FBH]]
uint3 test_firstbithigh_ulong3(uint64_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_ulong4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i64
+// CHECK: sub <4 x i32> splat (i32 63), [[FBH]]
uint4 test_firstbithigh_ulong4(uint64_t4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i32
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i32
+// CHECK: sub i32 31, [[FBH]]
uint test_firstbithigh_int(int p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i32
+// CHECK: sub <2 x i32> splat (i32 31), [[FBH]]
uint2 test_firstbithigh_int2(int2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i32
+// CHECK: sub <3 x i32> splat (i32 31), [[FBH]]
uint3 test_firstbithigh_int3(int3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_int4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i32
+// CHECK: sub <4 x i32> splat (i32 31), [[FBH]]
uint4 test_firstbithigh_int4(int4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long
-// CHECK: call i32 @llvm.[[TARGET]].firstbitshigh.i64
+// CHECK: [[FBH:%.*]] = call i32 @llvm.[[TARGET]].firstbitshigh.i64
+// CHECK: sub i32 63, [[FBH]]
uint test_firstbithigh_long(int64_t p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long2
-// CHECK: call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
+// CHECK: [[FBH:%.*]] = call <2 x i32> @llvm.[[TARGET]].firstbitshigh.v2i64
+// CHECK: sub <2 x i32> splat (i32 63), [[FBH]]
uint2 test_firstbithigh_long2(int64_t2 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long3
-// CHECK: call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
+// CHECK: [[FBH:%.*]] = call <3 x i32> @llvm.[[TARGET]].firstbitshigh.v3i64
+// CHECK: sub <3 x i32> splat (i32 63), [[FBH]]
uint3 test_firstbithigh_long3(int64_t3 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_long4
-// CHECK: call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
+// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbitshigh.v4i64
+// CHECK: sub <4 x i32> splat (i32 63), [[FBH]]
uint4 test_firstbithigh_long4(int64_t4 p0) {
return firstbithigh(p0);
}
// CHECK-LABEL: test_firstbithigh_upcast
// CHECK: [[FBH:%.*]] = call <4 x i32> @llvm.[[TARGET]].firstbituhigh.v4i32(<4 x i32> %{{.*}})
-// CHECK: [[CONV:%.*]] = zext <4 x i32> [[FBH]] to <4 x i64>
+// CHECK: [[SUB:%.*]] = sub <4 x i32> splat (i32 31), [[FBH]]
+// CHECK: [[CONV:%.*]] = zext <4 x i32> [[SUB]] to <4 x i64>
// CHECK: ret <4 x i64> [[CONV]]
uint64_t4 test_firstbithigh_upcast(uint4 p0) {
return firstbithigh(p0);
``````````
</details>
https://github.com/llvm/llvm-project/pull/166419
More information about the cfe-commits
mailing list