[clang] [llvm] [HLSL] implement `clamp` intrinsic (PR #85424)
Farzon Lotfi via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 15 09:26:24 PDT 2024
https://github.com/farzonl created https://github.com/llvm/llvm-project/pull/85424
closes #70071
- `CGBuiltin.cpp` - Add the unsigned\generic clamp intrinsic emitter.
- `IntrinsicsDirectX.td` - add the `dx.clamp` & `dx.uclamp` intrinsics
- `DXILIntrinsicExpansion.cpp` - add the `clamp` instruction expansion while maintaining vector form.
- `SemaChecking.cpp` - Add `clamp` builtin Sema Checks.
- `Builtins.td` - add a `clamp` builtin
- `hlsl_intrinsics.h` - add the `clamp` api
Why `clamp` as instruction expansion for DXIL?
1. SPIR-V has a GLSL `clamp` extension via:
- [FClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#FClamp)
- [UClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#UClamp)
- [SClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#SClamp)
2. Further Clamp lowers to `min(max( x, min_range ), max_range)` which we have float, signed, & unsigned dixilOps.
>From 681f4bbbc4aba08e285864ded62a7f01e178bf38 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 14 Mar 2024 15:26:26 -0400
Subject: [PATCH 1/2] [HLSL] Implement the intrinsic
---
clang/include/clang/Basic/Builtins.td | 6 +
clang/lib/CodeGen/CGBuiltin.cpp | 8 ++
clang/lib/Headers/hlsl/hlsl_intrinsics.h | 110 ++++++++++++++
clang/lib/Sema/SemaChecking.cpp | 15 +-
.../CodeGenHLSL/builtins/clamp-builtin.hlsl | 8 ++
clang/test/CodeGenHLSL/builtins/clamp.hlsl | 134 ++++++++++++++++++
.../test/SemaHLSL/BuiltIns/clamp-errors.hlsl | 91 ++++++++++++
llvm/include/llvm/IR/IntrinsicsDirectX.td | 2 +-
llvm/lib/Target/DirectX/DXIL.td | 10 ++
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 48 +++++++
llvm/test/CodeGen/DirectX/clamp.ll | 64 +++++++++
llvm/test/CodeGen/DirectX/fmax.ll | 31 ++++
llvm/test/CodeGen/DirectX/fmin.ll | 31 ++++
llvm/test/CodeGen/DirectX/smax.ll | 31 ++++
llvm/test/CodeGen/DirectX/smin.ll | 31 ++++
llvm/test/CodeGen/DirectX/umax.ll | 29 ++--
llvm/test/CodeGen/DirectX/umin.ll | 31 ++++
17 files changed, 664 insertions(+), 16 deletions(-)
create mode 100644 clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl
create mode 100644 clang/test/CodeGenHLSL/builtins/clamp.hlsl
create mode 100644 clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl
create mode 100644 llvm/test/CodeGen/DirectX/clamp.ll
create mode 100644 llvm/test/CodeGen/DirectX/fmax.ll
create mode 100644 llvm/test/CodeGen/DirectX/fmin.ll
create mode 100644 llvm/test/CodeGen/DirectX/smax.ll
create mode 100644 llvm/test/CodeGen/DirectX/smin.ll
create mode 100644 llvm/test/CodeGen/DirectX/umin.ll
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 58a2d22e7641fc..64599aaee0ced7 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4554,6 +4554,12 @@ def HLSLWaveActiveCountBits : LangBuiltin<"HLSL_LANG"> {
let Prototype = "unsigned int(bool)";
}
+def HLSLClamp : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_elementwise_clamp"];
+ let Attributes = [NoThrow, Const];
+ let Prototype = "void(...)";
+}
+
def HLSLCreateHandle : LangBuiltin<"HLSL_LANG"> {
let Spellings = ["__builtin_hlsl_create_handle"];
let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b09bf563622089..f831694fe9bc23 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17981,6 +17981,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
Intrinsic::dx_any, ArrayRef<Value *>{Op0}, nullptr, "dx.any");
}
+ case Builtin::BI__builtin_hlsl_elementwise_clamp: {
+ Value *OpX = EmitScalarExpr(E->getArg(0));
+ Value *OpMin = EmitScalarExpr(E->getArg(1));
+ Value *OpMax = EmitScalarExpr(E->getArg(2));
+ return Builder.CreateIntrinsic(
+ /*ReturnType=*/OpX->getType(), Intrinsic::dx_clamp,
+ ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
+ }
case Builtin::BI__builtin_hlsl_dot: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Value *Op1 = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 718fb9a9b35c04..5e703772b7ee4f 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -252,6 +252,116 @@ double3 ceil(double3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil)
double4 ceil(double4);
+//===----------------------------------------------------------------------===//
+// clamp builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T clamp(T X, T Min, T Max)
+/// \brief Clamps the specified value \a X to the specified
+/// minimum ( \a Min) and maximum ( \a Max) range.
+/// \param X A value to clamp.
+/// \param Min The specified minimum range.
+/// \param Max The specified maximum range.
+///
+/// Returns The clamped value for the \a X parameter.
+/// For values of -INF or INF, clamp will behave as expected.
+/// However for values of NaN, the results are undefined.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half clamp(half, half, half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half2 clamp(half2, half2, half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half3 clamp(half3, half3, half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half4 clamp(half4, half4, half4);
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t clamp(int16_t, int16_t, int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t2 clamp(int16_t2, int16_t2, int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t3 clamp(int16_t3, int16_t3, int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t4 clamp(int16_t4, int16_t4, int16_t4);
+
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t clamp(uint16_t, uint16_t, uint16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t2 clamp(uint16_t2, uint16_t2, uint16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t3 clamp(uint16_t3, uint16_t3, uint16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t4 clamp(uint16_t4, uint16_t4, uint16_t4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int clamp(int, int, int);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int2 clamp(int2, int2, int2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int3 clamp(int3, int3, int3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int4 clamp(int4, int4, int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint clamp(uint, uint, uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint2 clamp(uint2, uint2, uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint3 clamp(uint3, uint3, uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint4 clamp(uint4, uint4, uint4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t clamp(int64_t, int64_t, int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t2 clamp(int64_t2, int64_t2, int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t3 clamp(int64_t3, int64_t3, int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t4 clamp(int64_t4, int64_t4, int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t clamp(uint64_t, uint64_t, uint64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t2 clamp(uint64_t2, uint64_t2, uint64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t3 clamp(uint64_t3, uint64_t3, uint64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t4 clamp(uint64_t4, uint64_t4, uint64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float clamp(float, float, float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float2 clamp(float2, float2, float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float3 clamp(float3, float3, float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float4 clamp(float4, float4, float4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double clamp(double, double, double);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double2 clamp(double2, double2, double2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double3 clamp(double3, double3, double3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double4 clamp(double4, double4, double4);
+
//===----------------------------------------------------------------------===//
// cos builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index d88a38eb6eb97b..c47b47d4670e53 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5303,6 +5303,17 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
break;
}
+ case Builtin::BI__builtin_hlsl_elementwise_clamp: {
+ if (checkArgCount(*this, TheCall, 3))
+ return true;
+ if (CheckVectorElementCallArgs(this, TheCall))
+ return true;
+ if (SemaBuiltinElementwiseTernaryMath(
+ TheCall, /*CheckForFloatArgs*/
+ TheCall->getArg(0)->getType()->hasFloatingRepresentation()))
+ return true;
+ break;
+ }
case Builtin::BI__builtin_hlsl_dot: {
if (checkArgCount(*this, TheCall, 2))
return true;
@@ -5351,7 +5362,9 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
if (CheckVectorElementCallArgs(this, TheCall))
return true;
- if (SemaBuiltinElementwiseTernaryMath(TheCall, /*CheckForFloatArgs*/ false))
+ if (SemaBuiltinElementwiseTernaryMath(
+ TheCall, /*CheckForFloatArgs*/
+ TheCall->getArg(0)->getType()->hasFloatingRepresentation()))
return true;
}
}
diff --git a/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl
new file mode 100644
index 00000000000000..e3ef26429e7e40
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: builtin_test_clamp_int4
+// CHECK: %dx.clamp = call <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+// CHECK: ret <4 x i32> %dx.clamp
+int4 builtin_test_clamp_int4(int4 p0, int4 p1, int4 p2) {
+ return __builtin_hlsl_elementwise_clamp(p0, p1, p2);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
new file mode 100644
index 00000000000000..3506beb2bd1faa
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN: --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+#ifdef __HLSL_ENABLE_16_BIT
+// NATIVE_HALF: define noundef i16 @
+// NATIVE_HALF: call i16 @llvm.dx.clamp.i16(
+int16_t test_clamp_short(int16_t p0, int16_t p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <2 x i16> @
+// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16(
+int16_t2 test_clamp_short2(int16_t2 p0, int16_t2 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <3 x i16> @
+// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16
+int16_t3 test_clamp_short3(int16_t3 p0, int16_t3 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <4 x i16> @
+// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16
+int16_t4 test_clamp_short4(int16_t4 p0, int16_t4 p1) { return clamp(p0, p1,p1); }
+
+// NATIVE_HALF: define noundef i16 @
+// NATIVE_HALF: call i16 @llvm.dx.clamp.i16(
+uint16_t test_clamp_ushort(uint16_t p0, uint16_t p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <2 x i16> @
+// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16
+uint16_t2 test_clamp_ushort2(uint16_t2 p0, uint16_t2 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <3 x i16> @
+// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16
+uint16_t3 test_clamp_ushort3(uint16_t3 p0, uint16_t3 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <4 x i16> @
+// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16
+uint16_t4 test_clamp_ushort4(uint16_t4 p0, uint16_t4 p1) { return clamp(p0, p1,p1); }
+#endif
+
+// CHECK: define noundef i32 @
+// CHECK: call i32 @llvm.dx.clamp.i32(
+int test_clamp_int(int p0, int p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i32> @
+// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32
+int2 test_clamp_int2(int2 p0, int2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i32> @
+// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32
+int3 test_clamp_int3(int3 p0, int3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i32> @
+// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32
+int4 test_clamp_int4(int4 p0, int4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef i32 @
+// CHECK: call i32 @llvm.dx.clamp.i32(
+int test_clamp_uint(uint p0, uint p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i32> @
+// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32
+uint2 test_clamp_uint2(uint2 p0, uint2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i32> @
+// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32
+uint3 test_clamp_uint3(uint3 p0, uint3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i32> @
+// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32
+uint4 test_clamp_uint4(uint4 p0, uint4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef i64 @
+// CHECK: call i64 @llvm.dx.clamp.i64(
+int64_t test_clamp_long(int64_t p0, int64_t p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i64> @
+// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64
+int64_t2 test_clamp_long2(int64_t2 p0, int64_t2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i64> @
+// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64
+int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i64> @
+// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64
+int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef i64 @
+// CHECK: call i64 @llvm.dx.clamp.i64(
+uint64_t test_clamp_long(uint64_t p0, uint64_t p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i64> @
+// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64
+uint64_t2 test_clamp_long2(uint64_t2 p0, uint64_t2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i64> @
+// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64
+uint64_t3 test_clamp_long3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i64> @
+// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64
+uint64_t4 test_clamp_long4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); }
+
+// NATIVE_HALF: define noundef half @
+// NATIVE_HALF: call half @llvm.dx.clamp.f16(
+// NO_HALF: define noundef float @"?test_clamp_half
+// NO_HALF: call float @llvm.dx.clamp.f32(
+half test_clamp_half(half p0, half p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <2 x half> @
+// NATIVE_HALF: call <2 x half> @llvm.dx.clamp.v2f16
+// NO_HALF: define noundef <2 x float> @"?test_clamp_half2
+// NO_HALF: call <2 x float> @llvm.dx.clamp.v2f32(
+half2 test_clamp_half2(half2 p0, half2 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <3 x half> @
+// NATIVE_HALF: call <3 x half> @llvm.dx.clamp.v3f16
+// NO_HALF: define noundef <3 x float> @"?test_clamp_half3
+// NO_HALF: call <3 x float> @llvm.dx.clamp.v3f32(
+half3 test_clamp_half3(half3 p0, half3 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <4 x half> @
+// NATIVE_HALF: call <4 x half> @llvm.dx.clamp.v4f16
+// NO_HALF: define noundef <4 x float> @"?test_clamp_half4
+// NO_HALF: call <4 x float> @llvm.dx.clamp.v4f32(
+half4 test_clamp_half4(half4 p0, half4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef float @"?test_clamp_float
+// CHECK: call float @llvm.dx.clamp.f32(
+float test_clamp_float(float p0, float p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x float> @"?test_clamp_float2
+// CHECK: call <2 x float> @llvm.dx.clamp.v2f32
+float2 test_clamp_float2(float2 p0, float2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x float> @"?test_clamp_float3
+// CHECK: call <3 x float> @llvm.dx.clamp.v3f32
+float3 test_clamp_float3(float3 p0, float3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x float> @"?test_clamp_float4
+// CHECK: call <4 x float> @llvm.dx.clamp.v4f32
+float4 test_clamp_float4(float4 p0, float4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef double @
+// CHECK: call double @llvm.dx.clamp.f64(
+double test_clamp_double(double p0, double p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x double> @
+// CHECK: call <2 x double> @llvm.dx.clamp.v2f64
+double2 test_clamp_double2(double2 p0, double2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x double> @
+// CHECK: call <3 x double> @llvm.dx.clamp.v3f64
+double3 test_clamp_double3(double3 p0, double3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x double> @
+// CHECK: call <4 x double> @llvm.dx.clamp.v4f64
+double4 test_clamp_double4(double4 p0, double4 p1) { return clamp(p0, p1,p1); }
diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl
new file mode 100644
index 00000000000000..4c0e5315ce532e
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected
+
+float2 test_no_second_arg(float2 p0) {
+ return __builtin_hlsl_elementwise_clamp(p0);
+ // expected-error at -1 {{too few arguments to function call, expected 3, have 1}}
+}
+
+float2 test_no_third_arg(float2 p0) {
+ return __builtin_hlsl_elementwise_clamp(p0, p0);
+ // expected-error at -1 {{too few arguments to function call, expected 3, have 2}}
+}
+
+float2 test_too_many_arg(float2 p0) {
+ return __builtin_hlsl_elementwise_clamp(p0, p0, p0, p0);
+ // expected-error at -1 {{too many arguments to function call, expected 3, have 4}}
+}
+
+float2 test_clamp_no_second_arg(float2 p0) {
+ return clamp(p0);
+ // expected-error at -1 {{no matching function for call to 'clamp'}}
+}
+
+float2 test_clamp_vector_size_mismatch(float3 p0, float2 p1) {
+ return clamp(p0, p0, p1);
+ // expected-warning at -1 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}}
+}
+
+float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) {
+ return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+ // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}}
+}
+
+float test_clamp_scalar_mismatch(float p0, half p1) {
+ return clamp(p1, p0, p1);
+ // expected-error at -1 {{call to 'clamp' is ambiguous}}
+}
+
+float2 test_clamp_element_type_mismatch(half2 p0, float2 p1) {
+ return clamp(p1, p0, p1);
+ // expected-error at -1 {{call to 'clamp' is ambiguous}}
+}
+
+float2 test_builtin_clamp_float2_splat(float p0, float2 p1) {
+ return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+ // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float3 test_builtin_clamp_float3_splat(float p0, float3 p1) {
+ return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+ // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float4 test_builtin_clamp_float4_splat(float p0, float4 p1) {
+ return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+ // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float2 test_clamp_float2_int_splat(float2 p0, int p1) {
+ return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+ // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float3 test_clamp_float3_int_splat(float3 p0, int p1) {
+ return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+ // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float2 test_builtin_clamp_int_vect_to_float_vec_promotion(int2 p0, float p1) {
+ return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+ // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float test_builtin_clamp_bool_type_promotion(bool p0) {
+ return __builtin_hlsl_elementwise_clamp(p0, p0, p0);
+ // expected-error at -1 {{1st argument must be a vector, integer or floating point type (was 'bool')}}
+}
+
+float builtin_bool_to_float_type_promotion(float p0, bool p1) {
+ return __builtin_hlsl_elementwise_clamp(p0, p0, p1);
+ // expected-error at -1 {{3rd argument must be a floating point type (was 'bool')}}
+}
+
+float builtin_bool_to_float_type_promotion2(bool p0, float p1) {
+ return __builtin_hlsl_elementwise_clamp(p1, p0, p1);
+ // expected-error at -1 {{2nd argument must be a floating point type (was 'bool')}}
+}
+
+float builtin_clamp_int_to_float_promotion(float p0, int p1) {
+ return __builtin_hlsl_elementwise_clamp(p0, p0, p1);
+ // expected-error at -1 {{3rd argument must be a floating point type (was 'int')}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 00536c71c3e2e5..c717063f7c8581 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -21,7 +21,7 @@ def int_dx_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">,
Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>;
def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>;
-
+def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_dx_dot :
Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index b0e587d2e7eacf..216fa5b10c8f4d 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -274,8 +274,18 @@ def Round : DXILOpMapping<26, unary, int_round,
"Returns the input rounded to the nearest integer"
"within a floating-point type.",
[llvm_halforfloat_ty, LLVMMatchType<0>]>;
+def FMax : DXILOpMapping<35, binary, int_maxnum,
+ "Float maximum. FMax(a,b) = a > b ? a : b">;
+def FMin : DXILOpMapping<36, binary, int_minnum,
+ "Float minimum. FMin(a,b) = a < b ? a : b">;
+def SMax : DXILOpMapping<37, binary, int_smax,
+ "Signed integer maximum. SMax(a,b) = a > b ? a : b">;
+def SMin : DXILOpMapping<38, binary, int_smin,
+ "Signed integer minimum. SMin(a,b) = a < b ? a : b">;
def UMax : DXILOpMapping<39, binary, int_umax,
"Unsigned integer maximum. UMax(a,b) = a > b ? a : b">;
+def UMin : DXILOpMapping<40, binary, int_umin,
+ "Unsigned integer minimum. UMin(a,b) = a < b ? a : b">;
def FMad : DXILOpMapping<46, tertiary, int_fmuladd,
"Floating point arithmetic multiply/add operation. fmad(m,a,b) = m * a + b.">;
def IMad : DXILOpMapping<48, tertiary, int_dx_imad,
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 0461f0490017bf..bd20861ef20d5d 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -35,6 +35,7 @@ static bool isIntrinsicExpansion(Function &F) {
switch (F.getIntrinsicID()) {
case Intrinsic::exp:
case Intrinsic::dx_any:
+ case Intrinsic::dx_clamp:
case Intrinsic::dx_lerp:
case Intrinsic::dx_rcp:
return true;
@@ -132,12 +133,59 @@ static bool expandRcpIntrinsic(CallInst *Orig) {
return true;
}
+static Intrinsic::IndependentIntrinsics
+getCorrectMaxIntrinsic(Type *elemTy) {
+ if(elemTy->isVectorTy())
+ elemTy = elemTy->getScalarType();
+ if (elemTy->isIntegerTy()) {
+ const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy);
+ if (intType->getSignBit())
+ return Intrinsic::smax;
+ return Intrinsic::umax;
+ }
+ assert(elemTy->isFloatingPointTy());
+ return Intrinsic::maxnum;
+}
+
+static Intrinsic::IndependentIntrinsics
+getCorrectMinIntrinsic(Type *elemTy) {
+ if(elemTy->isVectorTy())
+ elemTy = elemTy->getScalarType();
+ if (elemTy->isIntegerTy()) {
+ const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy);
+ if (intType->getSignBit())
+ return Intrinsic::smin;
+ return Intrinsic::umin;
+ }
+ assert(elemTy->isFloatingPointTy());
+ return Intrinsic::minnum;
+}
+
+static bool expandClampIntrinsic(CallInst *Orig) {
+ Value *X = Orig->getOperand(0);
+ Value *Min = Orig->getOperand(1);
+ Value *Max = Orig->getOperand(2);
+ Type *Ty = X->getType();
+ IRBuilder<> Builder(Orig->getParent());
+ Builder.SetInsertPoint(Orig);
+ auto *MaxCall =
+ Builder.CreateIntrinsic(Ty, getCorrectMaxIntrinsic(Ty), {X, Min}, nullptr, "dx.max");
+ auto *MinCall =
+ Builder.CreateIntrinsic(Ty, getCorrectMinIntrinsic(Ty), {MaxCall, Max}, nullptr, "dx.min");
+
+ Orig->replaceAllUsesWith(MinCall);
+ Orig->eraseFromParent();
+ return true;
+}
+
static bool expandIntrinsic(Function &F, CallInst *Orig) {
switch (F.getIntrinsicID()) {
case Intrinsic::exp:
return expandExpIntrinsic(Orig);
case Intrinsic::dx_any:
return expandAnyIntrinsic(Orig);
+ case Intrinsic::dx_clamp:
+ return expandClampIntrinsic(Orig);
case Intrinsic::dx_lerp:
return expandLerpIntrinsic(Orig);
case Intrinsic::dx_rcp:
diff --git a/llvm/test/CodeGen/DirectX/clamp.ll b/llvm/test/CodeGen/DirectX/clamp.ll
new file mode 100644
index 00000000000000..8c7d845eb00af9
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/clamp.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for clamp are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_clamp_i16
+define noundef i16 @test_clamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}})
+ %0 = call i16 @llvm.dx.clamp.i16(i16 %a, i16 %b, i16 %c)
+ ret i16 %0
+}
+
+; CHECK-LABEL:test_clamp_i32
+define noundef i32 @test_clamp_i32(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}})
+ %0 = call i32 @llvm.dx.clamp.i32(i32 %a, i32 %b, i32 %c)
+ ret i32 %0
+}
+
+; CHECK-LABEL:test_clamp_i64
+define noundef i64 @test_clamp_i64(i64 noundef %a, i64 noundef %b, i64 noundef %c) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %a, i64 %b)
+; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %c)
+ %0 = call i64 @llvm.dx.clamp.i64(i64 %a, i64 %b, i64 %c)
+ ret i64 %0
+}
+
+; CHECK-LABEL:test_clamp_half
+define noundef half @test_clamp_half(half noundef %a, half noundef %b, half noundef %c) {
+entry:
+; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}})
+; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}})
+ %0 = call half @llvm.dx.clamp.f16(half %a, half %b, half %c)
+ ret half %0
+}
+
+; CHECK-LABEL:test_clamp_float
+define noundef float @test_clamp_float(float noundef %a, float noundef %b, float noundef %c) {
+entry:
+; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}})
+; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}})
+ %0 = call float @llvm.dx.clamp.f32(float %a, float %b, float %c)
+ ret float %0
+}
+
+; CHECK-LABEL:test_clamp_double
+define noundef double @test_clamp_double(double noundef %a, double noundef %b, double noundef %c) {
+entry:
+; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}})
+; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}})
+ %0 = call double @llvm.dx.clamp.f64(double %a, double %b, double %c)
+ ret double %0
+}
+
+declare half @llvm.dx.clamp.f16(half, half, half)
+declare float @llvm.dx.clamp.f32(float, float, float)
+declare double @llvm.dx.clamp.f64(double, double, double)
+declare i16 @llvm.dx.clamp.i16(i16, i16, i16)
+declare i32 @llvm.dx.clamp.i32(i32, i32, i32)
+declare i64 @llvm.dx.clamp.i64(i64, i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/fmax.ll b/llvm/test/CodeGen/DirectX/fmax.ll
new file mode 100644
index 00000000000000..aff722c29309c0
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/fmax.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for fmax are generated for half/float/double.
+
+; CHECK-LABEL:test_fmax_half
+define noundef half @test_fmax_half(half noundef %a, half noundef %b) {
+entry:
+; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}})
+ %0 = call half @llvm.maxnum.f16(half %a, half %b)
+ ret half %0
+}
+
+; CHECK-LABEL:test_fmax_float
+define noundef float @test_fmax_float(float noundef %a, float noundef %b) {
+entry:
+; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}})
+ %0 = call float @llvm.maxnum.f32(float %a, float %b)
+ ret float %0
+}
+
+; CHECK-LABEL:test_fmax_double
+define noundef double @test_fmax_double(double noundef %a, double noundef %b) {
+entry:
+; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}})
+ %0 = call double @llvm.maxnum.f64(double %a, double %b)
+ ret double %0
+}
+
+declare half @llvm.maxnum.f16(half, half)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.maxnum.f64(double, double)
diff --git a/llvm/test/CodeGen/DirectX/fmin.ll b/llvm/test/CodeGen/DirectX/fmin.ll
new file mode 100644
index 00000000000000..2f7c209f0278ae
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/fmin.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for fmin are generated for half/float/double.
+
+; CHECK-LABEL:test_fmin_half
+define noundef half @test_fmin_half(half noundef %a, half noundef %b) {
+entry:
+; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}})
+ %0 = call half @llvm.minnum.f16(half %a, half %b)
+ ret half %0
+}
+
+; CHECK-LABEL:test_fmin_float
+define noundef float @test_fmin_float(float noundef %a, float noundef %b) {
+entry:
+; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}})
+ %0 = call float @llvm.minnum.f32(float %a, float %b)
+ ret float %0
+}
+
+; CHECK-LABEL:test_fmin_double
+define noundef double @test_fmin_double(double noundef %a, double noundef %b) {
+entry:
+; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}})
+ %0 = call double @llvm.minnum.f64(double %a, double %b)
+ ret double %0
+}
+
+declare half @llvm.minnum.f16(half, half)
+declare float @llvm.minnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
diff --git a/llvm/test/CodeGen/DirectX/smax.ll b/llvm/test/CodeGen/DirectX/smax.ll
new file mode 100644
index 00000000000000..8b2406782c0938
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/smax.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for smax are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_smax_i16
+define noundef i16 @test_smax_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}})
+ %0 = call i16 @llvm.smax.i16(i16 %a, i16 %b)
+ ret i16 %0
+}
+
+; CHECK-LABEL:test_smax_i32
+define noundef i32 @test_smax_i32(i32 noundef %a, i32 noundef %b) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}})
+ %0 = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+ ret i32 %0
+}
+
+; CHECK-LABEL:test_smax_i64
+define noundef i64 @test_smax_i64(i64 noundef %a, i64 noundef %b) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %{{.*}}, i64 %{{.*}})
+ %0 = call i64 @llvm.smax.i64(i64 %a, i64 %b)
+ ret i64 %0
+}
+
+declare i16 @llvm.smax.i16(i16, i16)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i64 @llvm.smax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/smin.ll b/llvm/test/CodeGen/DirectX/smin.ll
new file mode 100644
index 00000000000000..b2b40a1b624335
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/smin.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for smin are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_smin_i16
+define noundef i16 @test_smin_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}})
+ %0 = call i16 @llvm.smin.i16(i16 %a, i16 %b)
+ ret i16 %0
+}
+
+; CHECK-LABEL:test_smin_i32
+define noundef i32 @test_smin_i32(i32 noundef %a, i32 noundef %b) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}})
+ %0 = call i32 @llvm.smin.i32(i32 %a, i32 %b)
+ ret i32 %0
+}
+
+; CHECK-LABEL:test_smin_i64
+define noundef i64 @test_smin_i64(i64 noundef %a, i64 noundef %b) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %{{.*}})
+ %0 = call i64 @llvm.smin.i64(i64 %a, i64 %b)
+ ret i64 %0
+}
+
+declare i16 @llvm.smin.i16(i16, i16)
+declare i32 @llvm.smin.i32(i32, i32)
+declare i64 @llvm.smin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/umax.ll b/llvm/test/CodeGen/DirectX/umax.ll
index c7b6a87599279a..be0f557fc8da69 100644
--- a/llvm/test/CodeGen/DirectX/umax.ll
+++ b/llvm/test/CodeGen/DirectX/umax.ll
@@ -1,30 +1,31 @@
; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
-; Make sure dxil operation function calls for umax are generated for i32/i64.
+; Make sure dxil operation function calls for umax are generated for i16/i32/i64.
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.7-library"
+; CHECK-LABEL:test_umax_i16
+define noundef i16 @test_umax_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}})
+ %0 = call i16 @llvm.umax.i16(i16 %a, i16 %b)
+ ret i16 %0
+}
; CHECK-LABEL:test_umax_i32
-; Function Attrs: noinline nounwind optnone
-define noundef i32 @test_umax_i32(i32 noundef %a, i32 noundef %b) #0 {
+define noundef i32 @test_umax_i32(i32 noundef %a, i32 noundef %b) {
entry:
-; CHECK:call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
%0 = call i32 @llvm.umax.i32(i32 %a, i32 %b)
ret i32 %0
}
; CHECK-LABEL:test_umax_i64
-define noundef i64 @test_umax_i64(i64 noundef %a, i64 noundef %b) #0 {
+define noundef i64 @test_umax_i64(i64 noundef %a, i64 noundef %b) {
entry:
-; CHECK:call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}})
%0 = call i64 @llvm.umax.i64(i64 %a, i64 %b)
ret i64 %0
}
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare i32 @llvm.umax.i32(i32, i32) #1
-declare i64 @llvm.umax.i64(i64, i64) #1
-
-attributes #0 = { noinline nounwind }
-attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+declare i16 @llvm.umax.i16(i16, i16)
+declare i32 @llvm.umax.i32(i32, i32)
+declare i64 @llvm.umax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/umin.ll b/llvm/test/CodeGen/DirectX/umin.ll
new file mode 100644
index 00000000000000..5051c711744892
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/umin.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for umin are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_umin_i16
+define noundef i16 @test_umin_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}})
+ %0 = call i16 @llvm.umin.i16(i16 %a, i16 %b)
+ ret i16 %0
+}
+
+; CHECK-LABEL:test_umin_i32
+define noundef i32 @test_umin_i32(i32 noundef %a, i32 noundef %b) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}})
+ %0 = call i32 @llvm.umin.i32(i32 %a, i32 %b)
+ ret i32 %0
+}
+
+; CHECK-LABEL:test_umin_i64
+define noundef i64 @test_umin_i64(i64 noundef %a, i64 noundef %b) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %{{.*}})
+ %0 = call i64 @llvm.umin.i64(i64 %a, i64 %b)
+ ret i64 %0
+}
+
+declare i16 @llvm.umin.i16(i16, i16)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i64 @llvm.umin.i64(i64, i64)
>From afe3e6dce778ae440d1ec51753e4982d4ab9b60b Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 15 Mar 2024 02:30:54 -0400
Subject: [PATCH 2/2] [HLSL] implement `clamp` intrinsic closes #70071 -
`CGBuiltin.cpp` - Add the unsigned\generic clamp intrinsic emitter. -
`IntrinsicsDirectX.td` - add the `dx.clamp` & `dx.uclamp` intrinsics -
`DXILIntrinsicExpansion.cpp` - add the `clamp` instruction expansion while
maintaining vector form. - `SemaChecking.cpp` - Add `clamp` builtin Sema
Checks. - `Builtins.td` - add a `clamp` builtin - `hlsl_intrinsics.h` - add
the `clamp` api Why `clamp` as instruction expansion for DXIL? 1. SPIR-V has
a GLSL `clamp` extension via: -
[FClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#FClamp)
-
[UClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#UClamp)
-
[SClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#SClamp)
2. Further Clamp lowers to `min(max( x, min_range ), max_range)` which we
have float, signed, and unsigned dixilOps for
commit specific changes:
make changes to support calling unsigned dxilops
---
clang/lib/CodeGen/CGBuiltin.cpp | 9 ++-
clang/test/CodeGenHLSL/builtins/clamp.hlsl | 24 +++---
clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl | 6 +-
llvm/include/llvm/IR/IntrinsicsDirectX.td | 3 +-
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 48 ++++++------
llvm/test/CodeGen/DirectX/clamp-vec.ll | 74 +++++++++++++++++++
llvm/test/CodeGen/DirectX/clamp.ll | 32 +++++++-
7 files changed, 155 insertions(+), 41 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/clamp-vec.ll
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f831694fe9bc23..77fb1bc0f4eb3a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17985,8 +17985,15 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
Value *OpX = EmitScalarExpr(E->getArg(0));
Value *OpMin = EmitScalarExpr(E->getArg(1));
Value *OpMax = EmitScalarExpr(E->getArg(2));
+
+ QualType Ty = E->getArg(0)->getType();
+ bool IsUnsigned = false;
+ if (auto *VecTy = Ty->getAs<VectorType>())
+ Ty = VecTy->getElementType();
+ IsUnsigned = Ty->isUnsignedIntegerType();
return Builder.CreateIntrinsic(
- /*ReturnType=*/OpX->getType(), Intrinsic::dx_clamp,
+ /*ReturnType=*/OpX->getType(),
+ IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
}
case Builtin::BI__builtin_hlsl_dot: {
diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
index 3506beb2bd1faa..029e48ffe25865 100644
--- a/clang/test/CodeGenHLSL/builtins/clamp.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
@@ -21,16 +21,16 @@ int16_t3 test_clamp_short3(int16_t3 p0, int16_t3 p1) { return clamp(p0, p1,p1);
int16_t4 test_clamp_short4(int16_t4 p0, int16_t4 p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF: define noundef i16 @
-// NATIVE_HALF: call i16 @llvm.dx.clamp.i16(
+// NATIVE_HALF: call i16 @llvm.dx.uclamp.i16(
uint16_t test_clamp_ushort(uint16_t p0, uint16_t p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF: define noundef <2 x i16> @
-// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16
+// NATIVE_HALF: call <2 x i16> @llvm.dx.uclamp.v2i16
uint16_t2 test_clamp_ushort2(uint16_t2 p0, uint16_t2 p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF: define noundef <3 x i16> @
-// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16
+// NATIVE_HALF: call <3 x i16> @llvm.dx.uclamp.v3i16
uint16_t3 test_clamp_ushort3(uint16_t3 p0, uint16_t3 p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF: define noundef <4 x i16> @
-// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16
+// NATIVE_HALF: call <4 x i16> @llvm.dx.uclamp.v4i16
uint16_t4 test_clamp_ushort4(uint16_t4 p0, uint16_t4 p1) { return clamp(p0, p1,p1); }
#endif
@@ -48,16 +48,16 @@ int3 test_clamp_int3(int3 p0, int3 p1) { return clamp(p0, p1,p1); }
int4 test_clamp_int4(int4 p0, int4 p1) { return clamp(p0, p1,p1); }
// CHECK: define noundef i32 @
-// CHECK: call i32 @llvm.dx.clamp.i32(
+// CHECK: call i32 @llvm.dx.uclamp.i32(
int test_clamp_uint(uint p0, uint p1) { return clamp(p0, p1,p1); }
// CHECK: define noundef <2 x i32> @
-// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32
+// CHECK: call <2 x i32> @llvm.dx.uclamp.v2i32
uint2 test_clamp_uint2(uint2 p0, uint2 p1) { return clamp(p0, p1,p1); }
// CHECK: define noundef <3 x i32> @
-// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32
+// CHECK: call <3 x i32> @llvm.dx.uclamp.v3i32
uint3 test_clamp_uint3(uint3 p0, uint3 p1) { return clamp(p0, p1,p1); }
// CHECK: define noundef <4 x i32> @
-// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32
+// CHECK: call <4 x i32> @llvm.dx.uclamp.v4i32
uint4 test_clamp_uint4(uint4 p0, uint4 p1) { return clamp(p0, p1,p1); }
// CHECK: define noundef i64 @
@@ -74,16 +74,16 @@ int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); }
int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); }
// CHECK: define noundef i64 @
-// CHECK: call i64 @llvm.dx.clamp.i64(
+// CHECK: call i64 @llvm.dx.uclamp.i64(
uint64_t test_clamp_long(uint64_t p0, uint64_t p1) { return clamp(p0, p1,p1); }
// CHECK: define noundef <2 x i64> @
-// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64
+// CHECK: call <2 x i64> @llvm.dx.uclamp.v2i64
uint64_t2 test_clamp_long2(uint64_t2 p0, uint64_t2 p1) { return clamp(p0, p1,p1); }
// CHECK: define noundef <3 x i64> @
-// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64
+// CHECK: call <3 x i64> @llvm.dx.uclamp.v3i64
uint64_t3 test_clamp_long3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1); }
// CHECK: define noundef <4 x i64> @
-// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64
+// CHECK: call <4 x i64> @llvm.dx.uclamp.v4i64
uint64_t4 test_clamp_long4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); }
// NATIVE_HALF: define noundef half @
diff --git a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
index 0b6843591455bd..97ce931bf1b5b5 100644
--- a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
@@ -72,15 +72,15 @@ float2 test_builtin_mad_int_vect_to_float_vec_promotion(int2 p0, float p1) {
float builtin_bool_to_float_type_promotion(float p0, bool p1) {
return __builtin_hlsl_mad(p0, p0, p1);
- // expected-error at -1 {{3rd argument must be a vector, integer or floating point type (was 'bool')}}
+ // expected-error at -1 {{3rd argument must be a floating point type (was 'bool')}}
}
float builtin_bool_to_float_type_promotion2(bool p0, float p1) {
return __builtin_hlsl_mad(p1, p0, p1);
- // expected-error at -1 {{2nd argument must be a vector, integer or floating point type (was 'bool')}}
+ // expected-error at -1 {{2nd argument must be a floating point type (was 'bool')}}
}
float builtin_mad_int_to_float_promotion(float p0, int p1) {
return __builtin_hlsl_mad(p0, p0, p1);
- // expected-error at -1 {{arguments are of different types ('double' vs 'int')}}
+ // expected-error at -1 {{3rd argument must be a floating point type (was 'int')}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index c717063f7c8581..5c72f06f96ed12 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -21,7 +21,8 @@ def int_dx_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">,
Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>;
def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>;
-def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
+def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
+def int_dx_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_dx_dot :
Intrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index bd20861ef20d5d..441111a7ce7ca9 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -36,6 +36,7 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::exp:
case Intrinsic::dx_any:
case Intrinsic::dx_clamp:
+ case Intrinsic::dx_uclamp:
case Intrinsic::dx_lerp:
case Intrinsic::dx_rcp:
return true;
@@ -133,35 +134,33 @@ static bool expandRcpIntrinsic(CallInst *Orig) {
return true;
}
-static Intrinsic::IndependentIntrinsics
-getCorrectMaxIntrinsic(Type *elemTy) {
- if(elemTy->isVectorTy())
- elemTy = elemTy->getScalarType();
- if (elemTy->isIntegerTy()) {
- const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy);
- if (intType->getSignBit())
- return Intrinsic::smax;
+static Intrinsic::ID getCorrectMaxIntrinsic(Type *elemTy,
+ Intrinsic::ID clampIntrinsic) {
+ if (clampIntrinsic == Intrinsic::dx_uclamp)
return Intrinsic::umax;
- }
+ assert(clampIntrinsic == Intrinsic::dx_clamp);
+ if (elemTy->isVectorTy())
+ elemTy = elemTy->getScalarType();
+ if (elemTy->isIntegerTy())
+ return Intrinsic::smax;
assert(elemTy->isFloatingPointTy());
return Intrinsic::maxnum;
}
-static Intrinsic::IndependentIntrinsics
-getCorrectMinIntrinsic(Type *elemTy) {
- if(elemTy->isVectorTy())
- elemTy = elemTy->getScalarType();
- if (elemTy->isIntegerTy()) {
- const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy);
- if (intType->getSignBit())
- return Intrinsic::smin;
+static Intrinsic::ID getCorrectMinIntrinsic(Type *elemTy,
+ Intrinsic::ID clampIntrinsic) {
+ if (clampIntrinsic == Intrinsic::dx_uclamp)
return Intrinsic::umin;
- }
+ assert(clampIntrinsic == Intrinsic::dx_clamp);
+ if (elemTy->isVectorTy())
+ elemTy = elemTy->getScalarType();
+ if (elemTy->isIntegerTy())
+ return Intrinsic::smin;
assert(elemTy->isFloatingPointTy());
return Intrinsic::minnum;
}
-static bool expandClampIntrinsic(CallInst *Orig) {
+static bool expandClampIntrinsic(CallInst *Orig, Intrinsic::ID clampIntrinsic) {
Value *X = Orig->getOperand(0);
Value *Min = Orig->getOperand(1);
Value *Max = Orig->getOperand(2);
@@ -169,10 +168,12 @@ static bool expandClampIntrinsic(CallInst *Orig) {
IRBuilder<> Builder(Orig->getParent());
Builder.SetInsertPoint(Orig);
auto *MaxCall =
- Builder.CreateIntrinsic(Ty, getCorrectMaxIntrinsic(Ty), {X, Min}, nullptr, "dx.max");
+ Builder.CreateIntrinsic(Ty, getCorrectMaxIntrinsic(Ty, clampIntrinsic),
+ {X, Min}, nullptr, "dx.max");
auto *MinCall =
- Builder.CreateIntrinsic(Ty, getCorrectMinIntrinsic(Ty), {MaxCall, Max}, nullptr, "dx.min");
-
+ Builder.CreateIntrinsic(Ty, getCorrectMinIntrinsic(Ty, clampIntrinsic),
+ {MaxCall, Max}, nullptr, "dx.min");
+
Orig->replaceAllUsesWith(MinCall);
Orig->eraseFromParent();
return true;
@@ -184,8 +185,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
return expandExpIntrinsic(Orig);
case Intrinsic::dx_any:
return expandAnyIntrinsic(Orig);
+ case Intrinsic::dx_uclamp:
case Intrinsic::dx_clamp:
- return expandClampIntrinsic(Orig);
+ return expandClampIntrinsic(Orig, F.getIntrinsicID());
case Intrinsic::dx_lerp:
return expandLerpIntrinsic(Orig);
case Intrinsic::dx_rcp:
diff --git a/llvm/test/CodeGen/DirectX/clamp-vec.ll b/llvm/test/CodeGen/DirectX/clamp-vec.ll
new file mode 100644
index 00000000000000..d4f33a18b71573
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/clamp-vec.ll
@@ -0,0 +1,74 @@
+; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s
+
+; Make sure dxil operation function calls for clamp are generated for float/int/uint vectors.
+
+; CHECK-LABEL: clamp_half3
+define noundef <3 x half> @clamp_half3(<3 x half> noundef %a, <3 x half> noundef %b, <3 x half> noundef %c) {
+entry:
+ ; CHECK: call <3 x half> @llvm.maxnum.v3f16(<3 x half> %a, <3 x half> %b)
+ ; CHECK: call <3 x half> @llvm.minnum.v3f16(<3 x half> %{{.*}}, <3 x half> %c)
+ %dx.clamp = call <3 x half> @llvm.dx.clamp.v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c)
+ ret <3 x half> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_float4
+define noundef <4 x float> @clamp_float4(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) {
+entry:
+ ; CHECK: call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ; CHECK: call <4 x float> @llvm.minnum.v4f32(<4 x float> %{{.*}}, <4 x float> %c)
+ %dx.clamp = call <4 x float> @llvm.dx.clamp.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+ ret <4 x float> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_double2
+define noundef <2 x double> @clamp_double2(<2 x double> noundef %a, <2 x double> noundef %b, <2 x double> noundef %c) {
+entry:
+ ; CHECK: call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b)
+ ; CHECK: call <2 x double> @llvm.minnum.v2f64(<2 x double> %{{.*}}, <2 x double> %c)
+ %dx.clamp = call <2 x double> @llvm.dx.clamp.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+ ret <2 x double> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_int4
+define noundef <4 x i32> @clamp_int4(<4 x i32> noundef %a, <4 x i32> noundef %b, <4 x i32> noundef %c) {
+entry:
+ ; CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
+ ; CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %c)
+ %dx.clamp = call <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+ ret <4 x i32> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_uint16_t3
+define noundef <3 x i16> @clamp_uint16_t3(<3 x i16> noundef %a, <3 x i16> noundef %b, <3 x i16> noundef %c) {
+entry:
+ ; CHECK: call <3 x i16> @llvm.umax.v3i16(<3 x i16> %a, <3 x i16> %b)
+ ; CHECK: call <3 x i16> @llvm.umin.v3i16(<3 x i16> %{{.*}}, <3 x i16> %c)
+ %dx.clamp = call <3 x i16> @llvm.dx.uclamp.v3i16(<3 x i16> %a, <3 x i16> %b, <3 x i16> %c)
+ ret <3 x i16> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_uint4
+define noundef <4 x i32> @clamp_uint4(<4 x i32> noundef %a, <4 x i32> noundef %b, <4 x i32> noundef %c) {
+entry:
+ ; CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b)
+ ; CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %c)
+ %dx.clamp = call <4 x i32> @llvm.dx.uclamp.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+ ret <4 x i32> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_uint64_t4
+define noundef <2 x i64> @clamp_uint64_t4(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) {
+entry:
+ ; CHECK: call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b)
+ ; CHECK: call <2 x i64> @llvm.umin.v2i64(<2 x i64> %{{.*}}, <2 x i64> %c)
+ %dx.clamp = call <2 x i64> @llvm.dx.uclamp.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+ ret <2 x i64> %dx.clamp
+}
+
+declare <3 x half> @llvm.dx.clamp.v3f16(<3 x half>, <3 x half>, <3 x half>)
+declare <4 x float> @llvm.dx.clamp.v4f32(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.dx.clamp.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <3 x i16> @llvm.dx.uclamp.v3i32(<3 x i16>, <3 x i32>, <3 x i16>)
+declare <4 x i32> @llvm.dx.uclamp.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.dx.uclamp.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
diff --git a/llvm/test/CodeGen/DirectX/clamp.ll b/llvm/test/CodeGen/DirectX/clamp.ll
index 8c7d845eb00af9..f122313b8d7dcc 100644
--- a/llvm/test/CodeGen/DirectX/clamp.ll
+++ b/llvm/test/CodeGen/DirectX/clamp.ll
@@ -1,6 +1,6 @@
; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
-; Make sure dxil operation function calls for clamp are generated for i16/i32/i64.
+; Make sure dxil operation function calls for clamp/uclamp are generated for half/float/double/i16/i32/i64.
; CHECK-LABEL:test_clamp_i16
define noundef i16 @test_clamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
@@ -56,9 +56,39 @@ entry:
ret double %0
}
+; CHECK-LABEL:test_uclamp_i16
+define noundef i16 @test_uclamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}})
+ %0 = call i16 @llvm.dx.uclamp.i16(i16 %a, i16 %b, i16 %c)
+ ret i16 %0
+}
+
+; CHECK-LABEL:test_uclamp_i32
+define noundef i32 @test_uclamp_i32(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}})
+ %0 = call i32 @llvm.dx.uclamp.i32(i32 %a, i32 %b, i32 %c)
+ ret i32 %0
+}
+
+; CHECK-LABEL:test_uclamp_i64
+define noundef i64 @test_uclamp_i64(i64 noundef %a, i64 noundef %b, i64 noundef %c) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %a, i64 %b)
+; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %c)
+ %0 = call i64 @llvm.dx.uclamp.i64(i64 %a, i64 %b, i64 %c)
+ ret i64 %0
+}
+
declare half @llvm.dx.clamp.f16(half, half, half)
declare float @llvm.dx.clamp.f32(float, float, float)
declare double @llvm.dx.clamp.f64(double, double, double)
declare i16 @llvm.dx.clamp.i16(i16, i16, i16)
declare i32 @llvm.dx.clamp.i32(i32, i32, i32)
declare i64 @llvm.dx.clamp.i64(i64, i64, i64)
+declare i16 @llvm.dx.uclamp.i16(i16, i16, i16)
+declare i32 @llvm.dx.uclamp.i32(i32, i32, i32)
+declare i64 @llvm.dx.uclamp.i64(i64, i64, i64)
More information about the llvm-commits
mailing list