[clang] [llvm] [HLSL] Implement the `fwidth` intrinsic for DXIL and SPIR-V target (PR #161378)
Alexander Johnston via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 1 16:10:24 PDT 2025
https://github.com/Alexander-Johnston updated https://github.com/llvm/llvm-project/pull/161378
>From a1419718a44fa4c65324477443f218e5382ed384 Mon Sep 17 00:00:00 2001
From: Alexander Johnston <alexander.johnston at amd.com>
Date: Mon, 29 Sep 2025 19:54:02 +0100
Subject: [PATCH] [HLSL] Implement the `fwidth` intrinsic
Closes #99120
---
clang/include/clang/Basic/Builtins.td | 12 +++++
.../clang/Basic/BuiltinsSPIRVCommon.td | 1 +
clang/lib/CodeGen/CGHLSLBuiltins.cpp | 18 ++++++++
clang/lib/CodeGen/TargetBuiltins/SPIR.cpp | 5 +++
.../lib/Headers/hlsl/hlsl_intrinsic_helpers.h | 25 +++++++++++
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 25 +++++++++++
clang/lib/Sema/SemaHLSL.cpp | 4 +-
clang/lib/Sema/SemaSPIRV.cpp | 18 ++++++++
clang/test/CodeGenSPIRV/Builtins/fwidth.c | 41 +++++++++++++++++
clang/test/SemaSPIRV/BuiltIns/fwidth-errors.c | 24 ++++++++++
llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 +
llvm/include/llvm/IR/IntrinsicsSPIRV.td | 1 +
llvm/lib/Target/DirectX/DXIL.td | 18 ++++++++
.../DirectX/DirectXTargetTransformInfo.cpp | 2 +
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 15 +++++++
llvm/test/CodeGen/DirectX/deriv_coarse_x.ll | 43 ++++++++++++++++++
.../CodeGen/DirectX/deriv_coarse_x_error.ll | 15 +++++++
llvm/test/CodeGen/DirectX/deriv_coarse_y.ll | 43 ++++++++++++++++++
.../CodeGen/DirectX/deriv_coarse_y_error.ll | 15 +++++++
.../CodeGen/SPIRV/hlsl-intrinsics/fwidth.ll | 44 +++++++++++++++++++
20 files changed, 370 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CodeGenSPIRV/Builtins/fwidth.c
create mode 100644 clang/test/SemaSPIRV/BuiltIns/fwidth-errors.c
create mode 100644 llvm/test/CodeGen/DirectX/deriv_coarse_x.ll
create mode 100644 llvm/test/CodeGen/DirectX/deriv_coarse_x_error.ll
create mode 100644 llvm/test/CodeGen/DirectX/deriv_coarse_y.ll
create mode 100644 llvm/test/CodeGen/DirectX/deriv_coarse_y_error.ll
create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fwidth.ll
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 468121f7d20ab..71fe555d6f689 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5204,6 +5204,18 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate {
let Prototype = "T(unsigned int, T)";
}
+def HLSLDerivCoarseX: LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_elementwise_deriv_coarse_x"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
+def HLSLDerivCoarseY: LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_elementwise_deriv_coarse_y"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
// Builtins for XRay.
def XRayCustomEvent : Builtin {
let Spellings = ["__xray_customevent"];
diff --git a/clang/include/clang/Basic/BuiltinsSPIRVCommon.td b/clang/include/clang/Basic/BuiltinsSPIRVCommon.td
index d2ef6f99a0502..95f73cf4effbc 100644
--- a/clang/include/clang/Basic/BuiltinsSPIRVCommon.td
+++ b/clang/include/clang/Basic/BuiltinsSPIRVCommon.td
@@ -21,3 +21,4 @@ def subgroup_local_invocation_id : SPIRVBuiltin<"uint32_t()", [NoThrow, Const]>;
def distance : SPIRVBuiltin<"void(...)", [NoThrow, Const]>;
def length : SPIRVBuiltin<"void(...)", [NoThrow, Const]>;
def smoothstep : SPIRVBuiltin<"void(...)", [NoThrow, Const, CustomTypeChecking]>;
+def fwidth : SPIRVBuiltin<"void(...)", [NoThrow, Const, CustomTypeChecking]>;
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 6c0fc8d7f07be..5ed77adbb1d16 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -532,6 +532,24 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getFracIntrinsic(),
ArrayRef<Value *>{Op0}, nullptr, "hlsl.frac");
}
+ case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_x: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+ llvm_unreachable(
+ "deriv coarse x operand must have a float representation");
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/Op0->getType(), llvm::Intrinsic::dx_deriv_coarse_x,
+ ArrayRef<Value *>{Op0}, nullptr, "hlsl.deriv.coarse.x");
+ }
+ case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_y: {
+ Value *Op0 = EmitScalarExpr(E->getArg(0));
+ if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+ llvm_unreachable(
+ "deriv coarse x operand must have a float representation");
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/Op0->getType(), llvm::Intrinsic::dx_deriv_coarse_y,
+ ArrayRef<Value *>{Op0}, nullptr, "hlsl.deriv.coarse.y");
+ }
case Builtin::BI__builtin_hlsl_elementwise_isinf: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
llvm::Type *Xty = Op0->getType();
diff --git a/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp b/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp
index 243aad8bf7083..43b05a128e876 100644
--- a/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/SPIR.cpp
@@ -151,6 +151,11 @@ Value *CodeGenFunction::EmitSPIRVBuiltinExpr(unsigned BuiltinID,
Intrinsic::spv_global_offset,
ArrayRef<Value *>{EmitScalarExpr(E->getArg(0))}, nullptr,
"spv.global.offset");
+ case SPIRV::BI__builtin_spirv_fwidth:
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/getTypes().ConvertType(E->getType()),
+ Intrinsic::spv_fwidth, ArrayRef<Value *>{EmitScalarExpr(E->getArg(0))},
+ nullptr, "spv.fwidth");
}
return nullptr;
}
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index c877234479ad1..01f32596ad554 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -148,6 +148,31 @@ template <typename T> constexpr T ldexp_impl(T X, T Exp) {
return exp2(Exp) * X;
}
+template <typename T> constexpr T fwidth_impl(T input) {
+#if (__has_builtin(__builtin_spirv_fwidth))
+ return __builtin_spirv_fwidth(input);
+#else
+ T derivCoarseX = __builtin_hlsl_elementwise_deriv_coarse_x(input);
+ derivCoarseX = abs(derivCoarseX);
+ T derivCoarseY = __builtin_hlsl_elementwise_deriv_coarse_y(input);
+ derivCoarseY = abs(derivCoarseY);
+ return derivCoarseX + derivCoarseY;
+#endif
+}
+
+template <typename T, int N>
+constexpr vector<T, N> fwidth_vec_impl(vector<T, N> input) {
+#if (__has_builtin(__builtin_spirv_fwidth))
+ return __builtin_spirv_fwidth(input);
+#else
+ vector<T, N> derivCoarseX = __builtin_hlsl_elementwise_deriv_coarse_x(input);
+ derivCoarseX = abs(derivCoarseX);
+ vector<T, N> derivCoarseY = __builtin_hlsl_elementwise_deriv_coarse_y(input);
+ derivCoarseY = abs(derivCoarseY);
+ return derivCoarseX + derivCoarseY;
+#endif
+}
+
} // namespace __detail
} // namespace hlsl
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 5ba5bfb9abde0..1e01828fd3ba1 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -605,5 +605,30 @@ smoothstep(__detail::HLSL_FIXED_VECTOR<float, N> Min,
return __detail::smoothstep_vec_impl(Min, Max, X);
}
+//===----------------------------------------------------------------------===//
+// fwidth builtin
+//===----------------------------------------------------------------------===//
+
+/// \fn T fwidth(T x)
+/// \brief Computes the sum of the absolute values of the partial derivatives
+/// with regard to the x and y screen space coordinates.
+/// \param x [in] The floating-point scalar or vector to process.
+///
+/// The return value is a floating-point scalar or vector where each element
+/// holds the computation of the matching element in the input.
+
+template <typename T>
+const inline __detail::enable_if_t<
+ __detail::is_arithmetic<T>::Value && __detail::is_same<float, T>::value, T>
+fwidth(T input) {
+ return __detail::fwidth_impl(input);
+}
+
+template <int N>
+const inline __detail::HLSL_FIXED_VECTOR<float, N>
+fwidth(__detail::HLSL_FIXED_VECTOR<float, N> input) {
+ return __detail::fwidth_vec_impl(input);
+}
+
} // namespace hlsl
#endif //_HLSL_HLSL_INTRINSICS_H_
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 940d510b4cc02..fe621d62988fe 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -3080,7 +3080,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
case Builtin::BI__builtin_hlsl_elementwise_degrees:
case Builtin::BI__builtin_hlsl_elementwise_radians:
case Builtin::BI__builtin_hlsl_elementwise_rsqrt:
- case Builtin::BI__builtin_hlsl_elementwise_frac: {
+ case Builtin::BI__builtin_hlsl_elementwise_frac:
+ case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_x:
+ case Builtin::BI__builtin_hlsl_elementwise_deriv_coarse_y: {
if (SemaRef.checkArgCount(TheCall, 1))
return true;
if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
diff --git a/clang/lib/Sema/SemaSPIRV.cpp b/clang/lib/Sema/SemaSPIRV.cpp
index c8ea0d09c4081..0e78cff9c1774 100644
--- a/clang/lib/Sema/SemaSPIRV.cpp
+++ b/clang/lib/Sema/SemaSPIRV.cpp
@@ -360,6 +360,24 @@ bool SemaSPIRV::CheckSPIRVBuiltinFunctionCall(const TargetInfo &TI,
case SPIRV::BI__builtin_spirv_generic_cast_to_ptr_explicit: {
return checkGenericCastToPtr(SemaRef, TheCall);
}
+ case SPIRV::BI__builtin_spirv_fwidth: {
+ if (SemaRef.checkArgCount(TheCall, 1))
+ return true;
+
+ // Check if first argument has floating representation
+ ExprResult A = TheCall->getArg(0);
+ QualType ArgTyA = A.get()->getType();
+ if (!ArgTyA->hasFloatingRepresentation()) {
+ SemaRef.Diag(A.get()->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+ << /* ordinal */ 1 << /* scalar or vector */ 5 << /* no int */ 0
+ << /* fp */ 1 << ArgTyA;
+ return true;
+ }
+
+ QualType RetTy = ArgTyA;
+ TheCall->setType(RetTy);
+ break;
+ }
}
return false;
}
diff --git a/clang/test/CodeGenSPIRV/Builtins/fwidth.c b/clang/test/CodeGenSPIRV/Builtins/fwidth.c
new file mode 100644
index 0000000000000..027b80500904d
--- /dev/null
+++ b/clang/test/CodeGenSPIRV/Builtins/fwidth.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -O1 -triple spirv-pc-vulkan-compute %s -emit-llvm -o - | FileCheck %s
+
+typedef _Float16 half;
+typedef half half2 __attribute__((ext_vector_type(2)));
+typedef half half3 __attribute__((ext_vector_type(3)));
+typedef half half4 __attribute__((ext_vector_type(4)));
+typedef float float2 __attribute__((ext_vector_type(2)));
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+
+// CHECK: [[fwidth0:%.*]] = tail call half @llvm.spv.fwidth.f16(half {{%.*}})
+// CHECK: ret half [[fwidth0]]
+half test_fwidth_half(half X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth0:%.*]] = tail call <2 x half> @llvm.spv.fwidth.v2f16(<2 x half> {{%.*}})
+// CHECK: ret <2 x half> [[fwidth0]]
+half2 test_fwidth_half2(half2 X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth0:%.*]] = tail call <3 x half> @llvm.spv.fwidth.v3f16(<3 x half> {{%.*}})
+// CHECK: ret <3 x half> [[fwidth0]]
+half3 test_fwidth_half3(half3 X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth0:%.*]] = tail call <4 x half> @llvm.spv.fwidth.v4f16(<4 x half> {{%.*}})
+// CHECK: ret <4 x half> [[fwidth0]]
+half4 test_fwidth_half4(half4 X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth0:%.*]] = tail call float @llvm.spv.fwidth.f32(float {{%.*}})
+// CHECK: ret float [[fwidth0]]
+float test_fwidth_float(float X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth1:%.*]] = tail call <2 x float> @llvm.spv.fwidth.v2f32(<2 x float> {{%.*}})
+// CHECK: ret <2 x float> [[fwidth1]]
+float2 test_fwidth_float2(float2 X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth2:%.*]] = tail call <3 x float> @llvm.spv.fwidth.v3f32(<3 x float> {{%.*}})
+// CHECK: ret <3 x float> [[fwidth2]]
+float3 test_fwidth_float3(float3 X) { return __builtin_spirv_fwidth(X); }
+
+// CHECK: [[fwidth3:%.*]] = tail call <4 x float> @llvm.spv.fwidth.v4f32(<4 x float> {{%.*}})
+// CHECK: ret <4 x float> [[fwidth3]]
+float4 test_fwidth_float4(float4 X) { return __builtin_spirv_fwidth(X); }
diff --git a/clang/test/SemaSPIRV/BuiltIns/fwidth-errors.c b/clang/test/SemaSPIRV/BuiltIns/fwidth-errors.c
new file mode 100644
index 0000000000000..44cdd819e4332
--- /dev/null
+++ b/clang/test/SemaSPIRV/BuiltIns/fwidth-errors.c
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -triple spirv-pc-vulkan-compute -verify
+
+typedef float float2 __attribute__((ext_vector_type(2)));
+
+void test_too_few_arg()
+{
+ return __builtin_spirv_fwidth();
+ // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+}
+
+float test_too_many_arg(float p0) {
+ return __builtin_spirv_fwidth(p0, p0);
+ // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+}
+
+float test_int_scalar_inputs(int p0) {
+ return __builtin_spirv_fwidth(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+}
+
+float test_mismatched_return(float2 p0) {
+ return __builtin_spirv_fwidth(p0);
+ // expected-error at -1 {{returning 'float2' (vector of 2 'float' values) from a function with incompatible result type 'float'}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 570d6bc35cbd0..1a4a0fc2364bd 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -162,6 +162,8 @@ def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>
[LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_discard : DefaultAttrsIntrinsic<[], [llvm_i1_ty], []>;
+def int_dx_deriv_coarse_x : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+def int_dx_deriv_coarse_y : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 823c491e1bfee..235568f4b20eb 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -132,6 +132,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
def int_spv_group_memory_barrier_with_group_sync
: DefaultAttrsIntrinsic<[], [], [IntrConvergent]>;
def int_spv_discard : DefaultAttrsIntrinsic<[], [], []>;
+ def int_spv_fwidth : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
def int_spv_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_spv_sclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
def int_spv_nclamp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 228114c5c24b2..02360cdc859fc 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -922,6 +922,24 @@ def Discard : DXILOp<82, discard> {
let stages = [Stages<DXIL1_0, [pixel]>];
}
+def DerivCoarseX : DXILOp<83, unary> {
+ let Doc = "computes the rate of change per stamp in x direction";
+ let intrinsics = [IntrinSelect<int_dx_deriv_coarse_x>];
+ let arguments = [OverloadTy];
+ let result = OverloadTy;
+ let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+ let stages = [Stages<DXIL1_0, [library, pixel, compute, amplification, mesh, node]>];
+}
+
+def DerivCoarseY : DXILOp<84, unary> {
+ let Doc = "computes the rate of change per stamp in y direction";
+ let intrinsics = [IntrinSelect<int_dx_deriv_coarse_y>];
+ let arguments = [OverloadTy];
+ let result = OverloadTy;
+ let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy]>];
+ let stages = [Stages<DXIL1_0, [library, pixel, compute, amplification, mesh, node]>];
+}
+
def ThreadId : DXILOp<93, threadId> {
let Doc = "Reads the thread ID";
let intrinsics = [IntrinSelect<int_dx_thread_id>];
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 68fd3e0bc74c7..4854a3e676918 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -48,6 +48,8 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_firstbitshigh:
case Intrinsic::dx_firstbituhigh:
case Intrinsic::dx_frac:
+ case Intrinsic::dx_deriv_coarse_x:
+ case Intrinsic::dx_deriv_coarse_y:
case Intrinsic::dx_isinf:
case Intrinsic::dx_isnan:
case Intrinsic::dx_rsqrt:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 1aadd9df189a8..1293c4eb4a3a4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -179,6 +179,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
bool selectSplatVector(Register ResVReg, const SPIRVType *ResType,
MachineInstr &I) const;
+ bool selectFwidth(Register ResVReg, const SPIRVType *ResType,
+ MachineInstr &I) const;
+
bool selectCmp(Register ResVReg, const SPIRVType *ResType,
unsigned comparisonOpcode, MachineInstr &I) const;
bool selectDiscard(Register ResVReg, const SPIRVType *ResType,
@@ -2615,6 +2618,15 @@ bool SPIRVInstructionSelector::selectDiscard(Register ResVReg,
.constrainAllUses(TII, TRI, RBI);
}
+bool SPIRVInstructionSelector::selectFwidth(Register ResVReg,
+ const SPIRVType *ResType,
+ MachineInstr &I) const {
+ return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpFwidth))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType))
+ .addUse(I.getOperand(2).getReg());
+}
+
bool SPIRVInstructionSelector::selectCmp(Register ResVReg,
const SPIRVType *ResType,
unsigned CmpOpc,
@@ -3451,6 +3463,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_discard: {
return selectDiscard(ResVReg, ResType, I);
}
+ case Intrinsic::spv_fwidth: {
+ return selectFwidth(ResVReg, ResType, I);
+ }
case Intrinsic::modf: {
return selectModf(ResVReg, ResType, I);
}
diff --git a/llvm/test/CodeGen/DirectX/deriv_coarse_x.ll b/llvm/test/CodeGen/DirectX/deriv_coarse_x.ll
new file mode 100644
index 0000000000000..49e584c5c158e
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/deriv_coarse_x.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Make sure dxil operation function calls for fwidth are generated for float, half vec, float, an32float v
+; Make sure dxil operation function calls for fwidth are generated for float, half vec, flv4oat, an32float vec
+
+
+define noundef half @deriv_coarse_x_half(half noundef %a) {
+; CHECK: call half @dx.op.unary.f16(i32 83, half %{{.*}})
+entry:
+ %dx.deriv.coarse.x = call half @llvm.dx.deriv.coarse.x.f16(half %a)
+ ret half %dx.deriv.coarse.x
+}
+
+define noundef float @deriv_coarse_x_float(float noundef %a) {
+; CHECK: call float @dx.op.unary.f32(i32 83, float %{{.*}})
+entry:
+ %dx.deriv.coarse.x = call float @llvm.dx.deriv.coarse.x.f32(float %a)
+ ret float %dx.deriv.coarse.x
+}
+
+define noundef <4 x float> @deriv_coarse_x_float4(<4 x float> noundef %a) {
+; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee0]])
+; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee1]])
+; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee2]])
+; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 83, float [[ee3]])
+; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+; CHECK: ret <4 x float> %{{.*}}
+entry:
+ %dx.deriv.coarse.x = call <4 x float> @llvm.dx.deriv.coarse.x.v4f32(<4 x float> %a)
+ ret <4 x float> %dx.deriv.coarse.x
+}
+
+declare half @llvm.dx.deriv.coarse.x.f16(half)
+declare float @llvm.dx.deriv.coarse.x.f32(float)
+declare <4 x float> @llvm.dx.deriv.coarse.x.v4f32(<4 x float>)
+
diff --git a/llvm/test/CodeGen/DirectX/deriv_coarse_x_error.ll b/llvm/test/CodeGen/DirectX/deriv_coarse_x_error.ll
new file mode 100644
index 0000000000000..eab495c5f7c6a
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/deriv_coarse_x_error.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation deriv_coarse_x does not support double overload type
+; CHECK: in function deriv_coarse_x
+; CHECK-SAME: Cannot create DerivCoarseX operation: Invalid overload type
+
+; Function Attrs: noinline nounwind optnone
+define noundef double @deriv_coarse_x_double(double noundef %a) #0 {
+entry:
+ %a.addr = alloca double, align 8
+ store double %a, ptr %a.addr, align 8
+ %0 = load double, ptr %a.addr, align 8
+ %dx.deriv_coarse_x = call double @llvm.dx.deriv.coarse.x.f64(double %0)
+ ret double %dx.deriv_coarse_x
+}
diff --git a/llvm/test/CodeGen/DirectX/deriv_coarse_y.ll b/llvm/test/CodeGen/DirectX/deriv_coarse_y.ll
new file mode 100644
index 0000000000000..76671991cf46b
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/deriv_coarse_y.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+; Make sure dxil operation function calls for fwidth are generated for float, half vec, float, an32float v
+; Make sure dxil operation function calls for fwidth are generated for float, half vec, flv4oat, an32float vec
+
+
+define noundef half @deriv_coarse_y_half(half noundef %a) {
+; CHECK: call half @dx.op.unary.f16(i32 84, half %{{.*}})
+entry:
+ %dx.deriv.coarse.y = call half @llvm.dx.deriv.coarse.y.f16(half %a)
+ ret half %dx.deriv.coarse.y
+}
+
+define noundef float @deriv_coarse_y_float(float noundef %a) {
+; CHECK: call float @dx.op.unary.f32(i32 84, float %{{.*}})
+entry:
+ %dx.deriv.coarse.y = call float @llvm.dx.deriv.coarse.y.f32(float %a)
+ ret float %dx.deriv.coarse.y
+}
+
+define noundef <4 x float> @deriv_coarse_y_float4(<4 x float> noundef %a) {
+; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee0]])
+; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee1]])
+; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee2]])
+; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 84, float [[ee3]])
+; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+; CHECK: ret <4 x float> %{{.*}}
+entry:
+ %dx.deriv.coarse.y = call <4 x float> @llvm.dx.deriv.coarse.y.v4f32(<4 x float> %a)
+ ret <4 x float> %dx.deriv.coarse.y
+}
+
+declare half @llvm.dx.deriv.coarse.y.f16(half)
+declare float @llvm.dx.deriv.coarse.y.f32(float)
+declare <4 x float> @llvm.dx.deriv.coarse.y.v4f32(<4 x float>)
+
diff --git a/llvm/test/CodeGen/DirectX/deriv_coarse_y_error.ll b/llvm/test/CodeGen/DirectX/deriv_coarse_y_error.ll
new file mode 100644
index 0000000000000..6f48b9e88f5b9
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/deriv_coarse_y_error.ll
@@ -0,0 +1,15 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+
+; DXIL operation deriv_coarse_y does not support double overload type
+; CHECK: in function deriv_coarse_y
+; CHECK-SAME: Cannot create DerivCoarseY operation: Invalid overload type
+
+; Function Attrs: noinline nounwind optnone
+define noundef double @deriv_coarse_y_double(double noundef %a) #0 {
+entry:
+ %a.addr = alloca double, align 8
+ store double %a, ptr %a.addr, align 8
+ %0 = load double, ptr %a.addr, align 8
+ %dx.deriv_coarse_y = call double @llvm.dx.deriv.coarse.y.f64(double %0)
+ ret double %dx.deriv_coarse_y
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fwidth.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fwidth.ll
new file mode 100644
index 0000000000000..45ebbdda4494c
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fwidth.ll
@@ -0,0 +1,44 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+
+define noundef float @fwidth_float(float noundef %a) {
+entry:
+; CHECK: %[[#float_32_arg:]] = OpFunctionParameter %[[#float_32]]
+; CHECK: %[[#]] = OpFwidth %[[#float_32]] %[[#float_32_arg]]
+ %elt.fwidth = call float @llvm.spv.fwidth.f32(float %a)
+ ret float %elt.fwidth
+}
+
+define noundef half @fwidth_half(half noundef %a) {
+entry:
+; CHECK: %[[#float_16_arg:]] = OpFunctionParameter %[[#float_16]]
+; CHECK: %[[#]] = OpFwidth %[[#float_16]] %[[#float_16_arg]]
+ %elt.fwidth = call half @llvm.spv.fwidth.f16(half %a)
+ ret half %elt.fwidth
+}
+
+define noundef <4 x float> @fwidth_float_vector(<4 x float> noundef %a) {
+entry:
+; CHECK: %[[#vec4_float_32_arg:]] = OpFunctionParameter %[[#vec4_float_32]]
+; CHECK: %[[#]] = OpFwidth %[[#vec4_float_32]] %[[#vec4_float_32_arg]]
+ %elt.fwidth = call <4 x float> @llvm.spv.fwidth.v4f32(<4 x float> %a)
+ ret <4 x float> %elt.fwidth
+}
+
+define noundef <4 x half> @fwidth_half_vector(<4 x half> noundef %a) {
+entry:
+; CHECK: %[[#vec4_float_16_arg:]] = OpFunctionParameter %[[#vec4_float_16]]
+; CHECK: %[[#]] = OpFwidth %[[#vec4_float_16]] %[[#vec4_float_16_arg]]
+ %elt.fwidth = call <4 x half> @llvm.spv.fwidth.v4f16(<4 x half> %a)
+ ret <4 x half> %elt.fwidth
+}
+
+declare float @llvm.spv.fwidth.f32(float)
+declare half @llvm.spv.fwidth.f16(half)
+
More information about the llvm-commits
mailing list