[clang] [HLSL] Implement D3DCOLORtoUBYTE4 intrinsic (PR #122202)
Deric Cheung via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 13 09:39:24 PST 2025
https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/122202
>From 5610b225e76b046e911c1a7a0c1e4ccc128d35a1 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Thu, 9 Jan 2025 01:14:52 +0000
Subject: [PATCH 1/3] [HLSL] Implement the D3DCOLORtoUBYTE4 intrinsic
---
clang/lib/Headers/hlsl/hlsl_detail.h | 8 +++++
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 17 +++++++++++
.../builtins/D3DCOLORtoUBYTE4.hlsl | 12 ++++++++
.../BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl | 29 +++++++++++++++++++
4 files changed, 66 insertions(+)
create mode 100644 clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
create mode 100644 clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 8d5fd941331531..470fa4214a12f8 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -33,6 +33,14 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
return __builtin_bit_cast(U, F);
}
+constexpr vector<uint, 4> d3d_color_to_ubyte4(vector<float, 4> V) {
+ // Use the same scaling factor used by FXC (i.e., 255.001953)
+ // Excerpt from stackoverflow discussion:
+ // "Built-in rounding, necessary because of truncation. 0.001953 * 256 = 0.5"
+ // https://stackoverflow.com/questions/52103720/why-does-d3dcolortoubyte4-multiplies-components-by-255-001953f
+ return V.zyxw * 255.001953f;
+}
+
} // namespace __detail
} // namespace hlsl
#endif //_HLSL_HLSL_DETAILS_H_
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index b745997f1d5a2b..e44403c6c802e0 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1857,6 +1857,23 @@ half3 cross(half3, half3);
_HLSL_BUILTIN_ALIAS(__builtin_hlsl_cross)
float3 cross(float3, float3);
+//===----------------------------------------------------------------------===//
+// D3DCOLORtoUBYTE4 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T D3DCOLORtoUBYTE4(T x)
+/// \brief Converts a floating-point, 4D vector set by a D3DCOLOR to a UBYTE4.
+/// \param x [in] The floating-point vector4 to convert.
+///
+/// The return value is the UBYTE4 representation of the \a x parameter.
+///
+/// This function swizzles and scales components of the \a x parameter. Use this
+/// function to compensate for the lack of UBYTE4 support in some hardware.
+
+constexpr vector<uint, 4> D3DCOLORtoUBYTE4(vector<float, 4> V) {
+ return __detail::d3d_color_to_ubyte4(V);
+}
+
//===----------------------------------------------------------------------===//
// rcp builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl b/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
new file mode 100644
index 00000000000000..7021de7192b5e5
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -finclude-default-header -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK
+
+// CHECK-LABEL: D3DCOLORtoUBYTE4
+int4 test_D3DCOLORtoUBYTE4(float4 p1) {
+ // CHECK: %[[SCALED:.*]] = fmul [[FMFLAGS:.*]]<4 x float> %{{.*}}, splat (float 0x406FE01000000000)
+ // CHECK: %[[CONVERTED:.*]] = fptoui <4 x float> %[[SCALED]] to <4 x i32>
+ // CHECK: %[[SHUFFLED:.*]] = shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+ // CHECK: ret <4 x i32> %[[SHUFFLED]]
+ return D3DCOLORtoUBYTE4(p1);
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl
new file mode 100644
index 00000000000000..e9ba851007c941
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify
+
+int4 test_too_few_arg() {
+ return D3DCOLORtoUBYTE4();
+ // expected-error at -1 {{no matching function for call to 'D3DCOLORtoUBYTE4'}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'V', but no arguments were provided}}
+}
+
+int4 test_too_many_arg(float4 v) {
+ return D3DCOLORtoUBYTE4(v, v);
+ // expected-error at -1 {{no matching function for call to 'D3DCOLORtoUBYTE4'}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires single argument 'V', but 2 arguments were provided}}
+}
+
+int4 float2_arg(float2 v) {
+ return D3DCOLORtoUBYTE4(v);
+ // expected-error at -1 {{no matching function for call to 'D3DCOLORtoUBYTE4'}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: no known conversion from 'vector<[...], 2>' to 'vector<[...], 4>' for 1st argument}}
+}
+
+struct S {
+ float4 f;
+};
+
+int4 struct_arg(S v) {
+ return D3DCOLORtoUBYTE4(v);
+ // expected-error at -1 {{no matching function for call to 'D3DCOLORtoUBYTE4'}}
+ // expected-note at hlsl/hlsl_intrinsics.h:* {{candidate function not viable: no known conversion from 'S' to 'vector<float, 4>' (vector of 4 'float' values) for 1st argument}}
+}
>From e2c4abd6d15789abba67671dc654c7f42d2a917e Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Fri, 10 Jan 2025 18:50:54 +0000
Subject: [PATCH 2/3] Address some PR comments
---
clang/lib/Headers/hlsl/hlsl_detail.h | 2 +-
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 4 ++--
clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl | 10 +++++-----
3 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index 470fa4214a12f8..f7677ac6c64cc2 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -33,7 +33,7 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
return __builtin_bit_cast(U, F);
}
-constexpr vector<uint, 4> d3d_color_to_ubyte4(vector<float, 4> V) {
+constexpr vector<uint, 4> d3d_color_to_ubyte4_impl(vector<float, 4> V) {
// Use the same scaling factor used by FXC (i.e., 255.001953)
// Excerpt from stackoverflow discussion:
// "Built-in rounding, necessary because of truncation. 0.001953 * 256 = 0.5"
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index e44403c6c802e0..c5d9024cb982b4 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -1858,7 +1858,7 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_cross)
float3 cross(float3, float3);
//===----------------------------------------------------------------------===//
-// D3DCOLORtoUBYTE4 builtins
+// D3DCOLORtoUBYTE4 builtin
//===----------------------------------------------------------------------===//
/// \fn T D3DCOLORtoUBYTE4(T x)
@@ -1871,7 +1871,7 @@ float3 cross(float3, float3);
/// function to compensate for the lack of UBYTE4 support in some hardware.
constexpr vector<uint, 4> D3DCOLORtoUBYTE4(vector<float, 4> V) {
- return __detail::d3d_color_to_ubyte4(V);
+ return __detail::d3d_color_to_ubyte4_impl(V);
}
//===----------------------------------------------------------------------===//
diff --git a/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl b/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
index 7021de7192b5e5..990f0aa910f300 100644
--- a/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
@@ -1,12 +1,12 @@
// RUN: %clang_cc1 -finclude-default-header -triple \
-// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: dxil-pc-shadermodel6.3-library %s \
// RUN: -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK
// CHECK-LABEL: D3DCOLORtoUBYTE4
int4 test_D3DCOLORtoUBYTE4(float4 p1) {
- // CHECK: %[[SCALED:.*]] = fmul [[FMFLAGS:.*]]<4 x float> %{{.*}}, splat (float 0x406FE01000000000)
- // CHECK: %[[CONVERTED:.*]] = fptoui <4 x float> %[[SCALED]] to <4 x i32>
- // CHECK: %[[SHUFFLED:.*]] = shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
- // CHECK: ret <4 x i32> %[[SHUFFLED]]
+ // CHECK: %[[SCALED:.*]] = fmul [[FMFLAGS:.*]][[FLOAT_TYPE:<4 x float>]] %{{.*}}, splat (float 0x406FE01000000000)
+ // CHECK: %[[CONVERTED:.*]] = fptoui [[FLOAT_TYPE]] %[[SCALED]] to [[INT_TYPE:<4 x i32>]]
+ // CHECK: %[[SHUFFLED:.*]] = shufflevector [[INT_TYPE]] %[[CONVERTED]], [[INT_TYPE]] poison, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+ // CHECK: ret [[INT_TYPE]] %[[SHUFFLED]]
return D3DCOLORtoUBYTE4(p1);
}
>From 6d170647917b822eeddeb4d5dab6024ed6843087 Mon Sep 17 00:00:00 2001
From: Icohedron <cheung.deric at gmail.com>
Date: Mon, 13 Jan 2025 17:15:26 +0000
Subject: [PATCH 3/3] Link to DXC implementation in comment
---
clang/lib/Headers/hlsl/hlsl_detail.h | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/clang/lib/Headers/hlsl/hlsl_detail.h b/clang/lib/Headers/hlsl/hlsl_detail.h
index b90696752c3c2a..3eb4a3dc861e36 100644
--- a/clang/lib/Headers/hlsl/hlsl_detail.h
+++ b/clang/lib/Headers/hlsl/hlsl_detail.h
@@ -42,9 +42,12 @@ constexpr enable_if_t<sizeof(U) == sizeof(T), U> bit_cast(T F) {
}
constexpr vector<uint, 4> d3d_color_to_ubyte4_impl(vector<float, 4> V) {
- // Use the same scaling factor used by FXC (i.e., 255.001953)
- // Excerpt from stackoverflow discussion:
- // "Built-in rounding, necessary because of truncation. 0.001953 * 256 = 0.5"
+ // Use the same scaling factor used by FXC, and DXC for DXIL
+ // (i.e., 255.001953)
+ // https://github.com/microsoft/DirectXShaderCompiler/blob/070d0d5a2beacef9eeb51037a9b04665716fd6f3/lib/HLSL/HLOperationLower.cpp#L666C1-L697C2
+ // The DXC implementation refers to a comment on the following stackoverflow
+ // discussion to justify the scaling factor: "Built-in rounding, necessary
+ // because of truncation. 0.001953 * 256 = 0.5"
// https://stackoverflow.com/questions/52103720/why-does-d3dcolortoubyte4-multiplies-components-by-255-001953f
return V.zyxw * 255.001953f;
}
More information about the cfe-commits
mailing list