[clang] [llvm] [HLSL] implement `clamp` intrinsic (PR #85424)

Fri Mar 15 09:26:24 PDT 2024

https://github.com/farzonl created https://github.com/llvm/llvm-project/pull/85424

closes #70071
- `CGBuiltin.cpp` - Add the unsigned\generic clamp intrinsic emitter.
- `IntrinsicsDirectX.td` - add the `dx.clamp` & `dx.uclamp` intrinsics
- `DXILIntrinsicExpansion.cpp` - add the `clamp` instruction expansion while maintaining vector form.
- `SemaChecking.cpp` -  Add `clamp`  builtin Sema Checks.
- `Builtins.td` - add a `clamp` builtin
- `hlsl_intrinsics.h` - add the `clamp` api

Why `clamp` as instruction expansion  for DXIL?
1. SPIR-V has a GLSL `clamp` extension via:
- [FClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#FClamp)
- [UClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#UClamp)
- [SClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#SClamp)
2. Further Clamp lowers to  `min(max( x, min_range ), max_range)` which we have float, signed, & unsigned dixilOps.


>From 681f4bbbc4aba08e285864ded62a7f01e178bf38 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 14 Mar 2024 15:26:26 -0400
Subject: [PATCH 1/2] [HLSL] Implement the  intrinsic

---
 clang/include/clang/Basic/Builtins.td         |   6 +
 clang/lib/CodeGen/CGBuiltin.cpp               |   8 ++
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      | 110 ++++++++++++++
 clang/lib/Sema/SemaChecking.cpp               |  15 +-
 .../CodeGenHLSL/builtins/clamp-builtin.hlsl   |   8 ++
 clang/test/CodeGenHLSL/builtins/clamp.hlsl    | 134 ++++++++++++++++++
 .../test/SemaHLSL/BuiltIns/clamp-errors.hlsl  |  91 ++++++++++++
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   2 +-
 llvm/lib/Target/DirectX/DXIL.td               |  10 ++
 .../Target/DirectX/DXILIntrinsicExpansion.cpp |  48 +++++++
 llvm/test/CodeGen/DirectX/clamp.ll            |  64 +++++++++
 llvm/test/CodeGen/DirectX/fmax.ll             |  31 ++++
 llvm/test/CodeGen/DirectX/fmin.ll             |  31 ++++
 llvm/test/CodeGen/DirectX/smax.ll             |  31 ++++
 llvm/test/CodeGen/DirectX/smin.ll             |  31 ++++
 llvm/test/CodeGen/DirectX/umax.ll             |  29 ++--
 llvm/test/CodeGen/DirectX/umin.ll             |  31 ++++
 17 files changed, 664 insertions(+), 16 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/clamp.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/clamp.ll
 create mode 100644 llvm/test/CodeGen/DirectX/fmax.ll
 create mode 100644 llvm/test/CodeGen/DirectX/fmin.ll
 create mode 100644 llvm/test/CodeGen/DirectX/smax.ll
 create mode 100644 llvm/test/CodeGen/DirectX/smin.ll
 create mode 100644 llvm/test/CodeGen/DirectX/umin.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 58a2d22e7641fc..64599aaee0ced7 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4554,6 +4554,12 @@ def HLSLWaveActiveCountBits : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "unsigned int(bool)";
 }
 
+def HLSLClamp : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_clamp"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 def HLSLCreateHandle : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_create_handle"];
   let Attributes = [NoThrow, Const];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b09bf563622089..f831694fe9bc23 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17981,6 +17981,14 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()),
         Intrinsic::dx_any, ArrayRef<Value *>{Op0}, nullptr, "dx.any");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_clamp: {
+    Value *OpX = EmitScalarExpr(E->getArg(0));
+    Value *OpMin = EmitScalarExpr(E->getArg(1));
+    Value *OpMax = EmitScalarExpr(E->getArg(2));
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/OpX->getType(), Intrinsic::dx_clamp,
+        ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
+  }
   case Builtin::BI__builtin_hlsl_dot: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     Value *Op1 = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 718fb9a9b35c04..5e703772b7ee4f 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -252,6 +252,116 @@ double3 ceil(double3);
 _HLSL_BUILTIN_ALIAS(__builtin_elementwise_ceil)
 double4 ceil(double4);
 
+//===----------------------------------------------------------------------===//
+// clamp builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T clamp(T X, T Min, T Max)
+/// \brief Clamps the specified value \a X to the specified
+/// minimum ( \a Min) and maximum ( \a Max) range.
+/// \param X A value to clamp.
+/// \param Min The specified minimum range.
+/// \param Max The specified maximum range.
+///
+/// Returns The clamped value for the \a X parameter.
+/// For values of -INF or INF, clamp will behave as expected.
+/// However for values of NaN, the results are undefined.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half clamp(half, half, half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half2 clamp(half2, half2, half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half3 clamp(half3, half3, half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+half4 clamp(half4, half4, half4);
+
+#ifdef __HLSL_ENABLE_16_BIT
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t clamp(int16_t, int16_t, int16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t2 clamp(int16_t2, int16_t2, int16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t3 clamp(int16_t3, int16_t3, int16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int16_t4 clamp(int16_t4, int16_t4, int16_t4);
+
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t clamp(uint16_t, uint16_t, uint16_t);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t2 clamp(uint16_t2, uint16_t2, uint16_t2);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t3 clamp(uint16_t3, uint16_t3, uint16_t3);
+_HLSL_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint16_t4 clamp(uint16_t4, uint16_t4, uint16_t4);
+#endif
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int clamp(int, int, int);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int2 clamp(int2, int2, int2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int3 clamp(int3, int3, int3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int4 clamp(int4, int4, int4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint clamp(uint, uint, uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint2 clamp(uint2, uint2, uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint3 clamp(uint3, uint3, uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint4 clamp(uint4, uint4, uint4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t clamp(int64_t, int64_t, int64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t2 clamp(int64_t2, int64_t2, int64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t3 clamp(int64_t3, int64_t3, int64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+int64_t4 clamp(int64_t4, int64_t4, int64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t clamp(uint64_t, uint64_t, uint64_t);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t2 clamp(uint64_t2, uint64_t2, uint64_t2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t3 clamp(uint64_t3, uint64_t3, uint64_t3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+uint64_t4 clamp(uint64_t4, uint64_t4, uint64_t4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float clamp(float, float, float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float2 clamp(float2, float2, float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float3 clamp(float3, float3, float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+float4 clamp(float4, float4, float4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double clamp(double, double, double);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double2 clamp(double2, double2, double2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double3 clamp(double3, double3, double3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_clamp)
+double4 clamp(double4, double4, double4);
+
 //===----------------------------------------------------------------------===//
 // cos builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index d88a38eb6eb97b..c47b47d4670e53 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5303,6 +5303,17 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_clamp: {
+    if (checkArgCount(*this, TheCall, 3))
+      return true;
+    if (CheckVectorElementCallArgs(this, TheCall))
+      return true;
+    if (SemaBuiltinElementwiseTernaryMath(
+            TheCall, /*CheckForFloatArgs*/
+            TheCall->getArg(0)->getType()->hasFloatingRepresentation()))
+      return true;
+    break;
+  }
   case Builtin::BI__builtin_hlsl_dot: {
     if (checkArgCount(*this, TheCall, 2))
       return true;
@@ -5351,7 +5362,9 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     if (CheckVectorElementCallArgs(this, TheCall))
       return true;
-    if (SemaBuiltinElementwiseTernaryMath(TheCall, /*CheckForFloatArgs*/ false))
+    if (SemaBuiltinElementwiseTernaryMath(
+            TheCall, /*CheckForFloatArgs*/
+            TheCall->getArg(0)->getType()->hasFloatingRepresentation()))
       return true;
   }
   }
diff --git a/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl
new file mode 100644
index 00000000000000..e3ef26429e7e40
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/clamp-builtin.hlsl
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// CHECK-LABEL: builtin_test_clamp_int4
+// CHECK: %dx.clamp = call <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
+// CHECK: ret <4 x i32> %dx.clamp
+int4 builtin_test_clamp_int4(int4 p0, int4 p1, int4 p2) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p2);
+}
diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
new file mode 100644
index 00000000000000..3506beb2bd1faa
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ 
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+#ifdef __HLSL_ENABLE_16_BIT
+// NATIVE_HALF: define noundef i16 @
+// NATIVE_HALF: call i16 @llvm.dx.clamp.i16(
+int16_t test_clamp_short(int16_t p0, int16_t p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <2 x i16> @
+// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16(
+int16_t2 test_clamp_short2(int16_t2 p0, int16_t2 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <3 x i16> @
+// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16
+int16_t3 test_clamp_short3(int16_t3 p0, int16_t3 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <4 x i16> @
+// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16
+int16_t4 test_clamp_short4(int16_t4 p0, int16_t4 p1) { return clamp(p0, p1,p1); }
+
+// NATIVE_HALF: define noundef i16 @
+// NATIVE_HALF: call i16 @llvm.dx.clamp.i16(
+uint16_t test_clamp_ushort(uint16_t p0, uint16_t p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <2 x i16> @
+// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16
+uint16_t2 test_clamp_ushort2(uint16_t2 p0, uint16_t2 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <3 x i16> @
+// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16
+uint16_t3 test_clamp_ushort3(uint16_t3 p0, uint16_t3 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <4 x i16> @
+// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16
+uint16_t4 test_clamp_ushort4(uint16_t4 p0, uint16_t4 p1) { return clamp(p0, p1,p1); }
+#endif
+
+// CHECK: define noundef i32 @
+// CHECK: call i32 @llvm.dx.clamp.i32(
+int test_clamp_int(int p0, int p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i32> @
+// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32
+int2 test_clamp_int2(int2 p0, int2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i32> @
+// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32
+int3 test_clamp_int3(int3 p0, int3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i32> @
+// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32
+int4 test_clamp_int4(int4 p0, int4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef i32 @
+// CHECK: call i32 @llvm.dx.clamp.i32(
+int test_clamp_uint(uint p0, uint p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i32> @
+// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32
+uint2 test_clamp_uint2(uint2 p0, uint2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i32> @
+// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32
+uint3 test_clamp_uint3(uint3 p0, uint3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i32> @
+// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32
+uint4 test_clamp_uint4(uint4 p0, uint4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef i64 @
+// CHECK: call i64 @llvm.dx.clamp.i64(
+int64_t test_clamp_long(int64_t p0, int64_t p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i64> @
+// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64
+int64_t2 test_clamp_long2(int64_t2 p0, int64_t2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i64> @
+// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64
+int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i64> @
+// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64
+int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef i64 @
+// CHECK: call i64 @llvm.dx.clamp.i64(
+uint64_t test_clamp_long(uint64_t p0, uint64_t p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x i64> @
+// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64
+uint64_t2 test_clamp_long2(uint64_t2 p0, uint64_t2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x i64> @
+// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64
+uint64_t3 test_clamp_long3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x i64> @
+// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64
+uint64_t4 test_clamp_long4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); }
+
+// NATIVE_HALF: define noundef half @
+// NATIVE_HALF: call half @llvm.dx.clamp.f16(
+// NO_HALF: define noundef float @"?test_clamp_half
+// NO_HALF: call float @llvm.dx.clamp.f32(
+half test_clamp_half(half p0, half p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <2 x half> @
+// NATIVE_HALF: call <2 x half> @llvm.dx.clamp.v2f16
+// NO_HALF: define noundef <2 x float> @"?test_clamp_half2
+// NO_HALF: call <2 x float> @llvm.dx.clamp.v2f32(
+half2 test_clamp_half2(half2 p0, half2 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <3 x half> @
+// NATIVE_HALF: call <3 x half> @llvm.dx.clamp.v3f16
+// NO_HALF: define noundef <3 x float> @"?test_clamp_half3
+// NO_HALF: call <3 x float> @llvm.dx.clamp.v3f32(
+half3 test_clamp_half3(half3 p0, half3 p1) { return clamp(p0, p1,p1); }
+// NATIVE_HALF: define noundef <4 x half> @
+// NATIVE_HALF: call <4 x half> @llvm.dx.clamp.v4f16
+// NO_HALF: define noundef <4 x float> @"?test_clamp_half4
+// NO_HALF: call <4 x float> @llvm.dx.clamp.v4f32(
+half4 test_clamp_half4(half4 p0, half4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef float @"?test_clamp_float
+// CHECK: call float @llvm.dx.clamp.f32(
+float test_clamp_float(float p0, float p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x float> @"?test_clamp_float2
+// CHECK: call <2 x float> @llvm.dx.clamp.v2f32
+float2 test_clamp_float2(float2 p0, float2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x float> @"?test_clamp_float3
+// CHECK: call <3 x float> @llvm.dx.clamp.v3f32
+float3 test_clamp_float3(float3 p0, float3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x float> @"?test_clamp_float4
+// CHECK: call <4 x float> @llvm.dx.clamp.v4f32
+float4 test_clamp_float4(float4 p0, float4 p1) { return clamp(p0, p1,p1); }
+
+// CHECK: define noundef double @
+// CHECK: call double @llvm.dx.clamp.f64(
+double test_clamp_double(double p0, double p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <2 x double> @
+// CHECK: call <2 x double> @llvm.dx.clamp.v2f64
+double2 test_clamp_double2(double2 p0, double2 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <3 x double> @
+// CHECK: call <3 x double> @llvm.dx.clamp.v3f64
+double3 test_clamp_double3(double3 p0, double3 p1) { return clamp(p0, p1,p1); }
+// CHECK: define noundef <4 x double> @
+// CHECK: call <4 x double> @llvm.dx.clamp.v4f64
+double4 test_clamp_double4(double4 p0, double4 p1) { return clamp(p0, p1,p1); }
diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl
new file mode 100644
index 00000000000000..4c0e5315ce532e
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected
+
+float2 test_no_second_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_clamp(p0);
+  // expected-error at -1 {{too few arguments to function call, expected 3, have 1}}
+}
+
+float2 test_no_third_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_clamp(p0, p0);
+  // expected-error at -1 {{too few arguments to function call, expected 3, have 2}}
+}
+
+float2 test_too_many_arg(float2 p0) {
+  return __builtin_hlsl_elementwise_clamp(p0, p0, p0, p0);
+  // expected-error at -1 {{too many arguments to function call, expected 3, have 4}}
+}
+
+float2 test_clamp_no_second_arg(float2 p0) {
+  return clamp(p0);
+  // expected-error at -1 {{no matching function for call to 'clamp'}}
+}
+
+float2 test_clamp_vector_size_mismatch(float3 p0, float2 p1) {
+  return clamp(p0, p0, p1);
+  // expected-warning at -1 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}}
+}
+
+float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must have the same type}}
+}
+
+float test_clamp_scalar_mismatch(float p0, half p1) {
+  return clamp(p1, p0, p1);
+  // expected-error at -1 {{call to 'clamp' is ambiguous}}
+}
+
+float2 test_clamp_element_type_mismatch(half2 p0, float2 p1) {
+  return clamp(p1, p0, p1);
+  // expected-error at -1 {{call to 'clamp' is ambiguous}}
+}
+
+float2 test_builtin_clamp_float2_splat(float p0, float2 p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float3 test_builtin_clamp_float3_splat(float p0, float3 p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float4 test_builtin_clamp_float4_splat(float p0, float4 p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float2 test_clamp_float2_int_splat(float2 p0, int p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float3 test_clamp_float3_int_splat(float3 p0, int p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float2 test_builtin_clamp_int_vect_to_float_vec_promotion(int2 p0, float p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p1, p1);
+  // expected-error at -1 {{all arguments to '__builtin_hlsl_elementwise_clamp' must be vectors}}
+}
+
+float test_builtin_clamp_bool_type_promotion(bool p0) {
+  return __builtin_hlsl_elementwise_clamp(p0, p0, p0);
+  // expected-error at -1 {{1st argument must be a vector, integer or floating point type (was 'bool')}}
+}
+
+float builtin_bool_to_float_type_promotion(float p0, bool p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p0, p1);
+  // expected-error at -1 {{3rd argument must be a floating point type (was 'bool')}}
+}
+
+float builtin_bool_to_float_type_promotion2(bool p0, float p1) {
+  return __builtin_hlsl_elementwise_clamp(p1, p0, p1);
+  // expected-error at -1 {{2nd argument must be a floating point type (was 'bool')}}
+}
+
+float builtin_clamp_int_to_float_promotion(float p0, int p1) {
+  return __builtin_hlsl_elementwise_clamp(p0, p0, p1);
+  // expected-error at -1 {{3rd argument must be a floating point type (was 'int')}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 00536c71c3e2e5..c717063f7c8581 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -21,7 +21,7 @@ def int_dx_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">,
     Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>;
 
 def int_dx_any  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>;
-
+def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; 
 def int_dx_dot : 
     Intrinsic<[LLVMVectorElementType<0>], 
     [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index b0e587d2e7eacf..216fa5b10c8f4d 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -274,8 +274,18 @@ def Round : DXILOpMapping<26, unary, int_round,
                          "Returns the input rounded to the nearest integer"
                          "within a floating-point type.",
                          [llvm_halforfloat_ty, LLVMMatchType<0>]>;
+def FMax : DXILOpMapping<35, binary, int_maxnum,
+                         "Float maximum. FMax(a,b) = a > b ? a : b">;
+def FMin : DXILOpMapping<36, binary, int_minnum,
+                         "Float minimum. FMin(a,b) = a < b ? a : b">;
+def SMax : DXILOpMapping<37, binary, int_smax,
+                         "Signed integer maximum. SMax(a,b) = a > b ? a : b">;
+def SMin : DXILOpMapping<38, binary, int_smin,
+                         "Signed integer minimum. SMin(a,b) = a < b ? a : b">;
 def UMax : DXILOpMapping<39, binary, int_umax,
                          "Unsigned integer maximum. UMax(a,b) = a > b ? a : b">;
+def UMin : DXILOpMapping<40, binary, int_umin,
+                         "Unsigned integer minimum. UMin(a,b) = a < b ? a : b">;
 def FMad : DXILOpMapping<46, tertiary, int_fmuladd,
                          "Floating point arithmetic multiply/add operation. fmad(m,a,b) = m * a + b.">;
 def IMad : DXILOpMapping<48, tertiary, int_dx_imad,
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 0461f0490017bf..bd20861ef20d5d 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -35,6 +35,7 @@ static bool isIntrinsicExpansion(Function &F) {
   switch (F.getIntrinsicID()) {
   case Intrinsic::exp:
   case Intrinsic::dx_any:
+  case Intrinsic::dx_clamp:
   case Intrinsic::dx_lerp:
   case Intrinsic::dx_rcp:
     return true;
@@ -132,12 +133,59 @@ static bool expandRcpIntrinsic(CallInst *Orig) {
   return true;
 }
 
+static Intrinsic::IndependentIntrinsics
+getCorrectMaxIntrinsic(Type *elemTy) {
+  if(elemTy->isVectorTy())
+    elemTy = elemTy->getScalarType();
+  if (elemTy->isIntegerTy()) {
+      const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy);
+      if (intType->getSignBit())
+        return Intrinsic::smax;
+    return Intrinsic::umax;
+  }
+  assert(elemTy->isFloatingPointTy());
+  return Intrinsic::maxnum;
+}
+
+static Intrinsic::IndependentIntrinsics
+getCorrectMinIntrinsic(Type *elemTy) {
+  if(elemTy->isVectorTy())
+    elemTy = elemTy->getScalarType();
+  if (elemTy->isIntegerTy()) {
+      const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy);
+      if (intType->getSignBit())
+        return Intrinsic::smin;
+    return Intrinsic::umin;
+  }
+  assert(elemTy->isFloatingPointTy());
+  return Intrinsic::minnum;
+}
+
+static bool expandClampIntrinsic(CallInst *Orig) {
+  Value *X = Orig->getOperand(0);
+  Value *Min = Orig->getOperand(1);
+  Value *Max = Orig->getOperand(2);
+  Type *Ty = X->getType();
+  IRBuilder<> Builder(Orig->getParent());
+  Builder.SetInsertPoint(Orig);
+  auto *MaxCall =
+      Builder.CreateIntrinsic(Ty, getCorrectMaxIntrinsic(Ty), {X, Min}, nullptr, "dx.max");
+  auto *MinCall =
+      Builder.CreateIntrinsic(Ty, getCorrectMinIntrinsic(Ty), {MaxCall, Max}, nullptr, "dx.min");
+  
+  Orig->replaceAllUsesWith(MinCall);
+  Orig->eraseFromParent();
+  return true;
+}
+
 static bool expandIntrinsic(Function &F, CallInst *Orig) {
   switch (F.getIntrinsicID()) {
   case Intrinsic::exp:
     return expandExpIntrinsic(Orig);
   case Intrinsic::dx_any:
     return expandAnyIntrinsic(Orig);
+  case Intrinsic::dx_clamp:
+    return expandClampIntrinsic(Orig);
   case Intrinsic::dx_lerp:
     return expandLerpIntrinsic(Orig);
   case Intrinsic::dx_rcp:
diff --git a/llvm/test/CodeGen/DirectX/clamp.ll b/llvm/test/CodeGen/DirectX/clamp.ll
new file mode 100644
index 00000000000000..8c7d845eb00af9
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/clamp.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for clamp are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_clamp_i16
+define noundef i16 @test_clamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.dx.clamp.i16(i16 %a, i16 %b, i16 %c)
+  ret i16 %0
+}
+
+; CHECK-LABEL:test_clamp_i32
+define noundef i32 @test_clamp_i32(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}})
+  %0 = call i32 @llvm.dx.clamp.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %0
+}
+
+; CHECK-LABEL:test_clamp_i64
+define noundef i64 @test_clamp_i64(i64 noundef %a, i64 noundef %b, i64 noundef %c) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %a, i64 %b)
+; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %c)
+  %0 = call i64 @llvm.dx.clamp.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %0
+}
+
+; CHECK-LABEL:test_clamp_half
+define noundef half @test_clamp_half(half noundef %a, half noundef %b, half noundef %c) {
+entry:
+; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}})
+; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}})
+  %0 = call half @llvm.dx.clamp.f16(half %a, half %b, half %c)
+  ret half %0
+}
+
+; CHECK-LABEL:test_clamp_float
+define noundef float @test_clamp_float(float noundef %a, float noundef %b, float noundef %c) {
+entry:
+; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}})
+; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}})
+  %0 = call float @llvm.dx.clamp.f32(float %a, float %b, float %c)
+  ret float %0
+}
+
+; CHECK-LABEL:test_clamp_double
+define noundef double @test_clamp_double(double noundef %a, double noundef %b, double noundef %c) {
+entry:
+; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}})
+; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}})
+  %0 = call double @llvm.dx.clamp.f64(double %a, double %b, double %c)
+  ret double %0
+}
+
+declare half @llvm.dx.clamp.f16(half, half, half)
+declare float @llvm.dx.clamp.f32(float, float, float)
+declare double @llvm.dx.clamp.f64(double, double, double)
+declare i16 @llvm.dx.clamp.i16(i16, i16, i16)
+declare i32 @llvm.dx.clamp.i32(i32, i32, i32)
+declare i64 @llvm.dx.clamp.i64(i64, i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/fmax.ll b/llvm/test/CodeGen/DirectX/fmax.ll
new file mode 100644
index 00000000000000..aff722c29309c0
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/fmax.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for fmax are generated for half/float/double.
+
+; CHECK-LABEL:test_fmax_half
+define noundef half @test_fmax_half(half noundef %a, half noundef %b) {
+entry:
+; CHECK: call half @dx.op.binary.f16(i32 35, half %{{.*}}, half %{{.*}})
+  %0 = call half @llvm.maxnum.f16(half %a, half %b)
+  ret half %0
+}
+
+; CHECK-LABEL:test_fmax_float
+define noundef float @test_fmax_float(float noundef %a, float noundef %b) {
+entry:
+; CHECK: call float @dx.op.binary.f32(i32 35, float %{{.*}}, float %{{.*}})
+  %0 = call float @llvm.maxnum.f32(float %a, float %b)
+  ret float %0
+}
+
+; CHECK-LABEL:test_fmax_double
+define noundef double @test_fmax_double(double noundef %a, double noundef %b) {
+entry:
+; CHECK: call double @dx.op.binary.f64(i32 35, double %{{.*}}, double %{{.*}})
+  %0 = call double @llvm.maxnum.f64(double %a, double %b)
+  ret double %0
+}
+
+declare half @llvm.maxnum.f16(half, half)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.maxnum.f64(double, double)
diff --git a/llvm/test/CodeGen/DirectX/fmin.ll b/llvm/test/CodeGen/DirectX/fmin.ll
new file mode 100644
index 00000000000000..2f7c209f0278ae
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/fmin.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for fmin are generated for half/float/double.
+
+; CHECK-LABEL:test_fmin_half
+define noundef half @test_fmin_half(half noundef %a, half noundef %b) {
+entry:
+; CHECK: call half @dx.op.binary.f16(i32 36, half %{{.*}}, half %{{.*}})
+  %0 = call half @llvm.minnum.f16(half %a, half %b)
+  ret half %0
+}
+
+; CHECK-LABEL:test_fmin_float
+define noundef float @test_fmin_float(float noundef %a, float noundef %b) {
+entry:
+; CHECK: call float @dx.op.binary.f32(i32 36, float %{{.*}}, float %{{.*}})
+  %0 = call float @llvm.minnum.f32(float %a, float %b)
+  ret float %0
+}
+
+; CHECK-LABEL:test_fmin_double
+define noundef double @test_fmin_double(double noundef %a, double noundef %b) {
+entry:
+; CHECK: call double @dx.op.binary.f64(i32 36, double %{{.*}}, double %{{.*}})
+  %0 = call double @llvm.minnum.f64(double %a, double %b)
+  ret double %0
+}
+
+declare half @llvm.minnum.f16(half, half)
+declare float @llvm.minnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
diff --git a/llvm/test/CodeGen/DirectX/smax.ll b/llvm/test/CodeGen/DirectX/smax.ll
new file mode 100644
index 00000000000000..8b2406782c0938
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/smax.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for smax are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_smax_i16
+define noundef i16 @test_smax_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 37, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.smax.i16(i16 %a, i16 %b)
+  ret i16 %0
+}
+
+; CHECK-LABEL:test_smax_i32
+define noundef i32 @test_smax_i32(i32 noundef %a, i32 noundef %b) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 37, i32 %{{.*}}, i32 %{{.*}})
+  %0 = call i32 @llvm.smax.i32(i32 %a, i32 %b)
+  ret i32 %0
+}
+
+; CHECK-LABEL:test_smax_i64
+define noundef i64 @test_smax_i64(i64 noundef %a, i64 noundef %b) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 37, i64 %{{.*}}, i64 %{{.*}})
+  %0 = call i64 @llvm.smax.i64(i64 %a, i64 %b)
+  ret i64 %0
+}
+
+declare i16 @llvm.smax.i16(i16, i16)
+declare i32 @llvm.smax.i32(i32, i32)
+declare i64 @llvm.smax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/smin.ll b/llvm/test/CodeGen/DirectX/smin.ll
new file mode 100644
index 00000000000000..b2b40a1b624335
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/smin.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for smin are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_smin_i16
+define noundef i16 @test_smin_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 38, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.smin.i16(i16 %a, i16 %b)
+  ret i16 %0
+}
+
+; CHECK-LABEL:test_smin_i32
+define noundef i32 @test_smin_i32(i32 noundef %a, i32 noundef %b) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 38, i32 %{{.*}}, i32 %{{.*}})
+  %0 = call i32 @llvm.smin.i32(i32 %a, i32 %b)
+  ret i32 %0
+}
+
+; CHECK-LABEL:test_smin_i64
+define noundef i64 @test_smin_i64(i64 noundef %a, i64 noundef %b) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 38, i64 %{{.*}}, i64 %{{.*}})
+  %0 = call i64 @llvm.smin.i64(i64 %a, i64 %b)
+  ret i64 %0
+}
+
+declare i16 @llvm.smin.i16(i16, i16)
+declare i32 @llvm.smin.i32(i32, i32)
+declare i64 @llvm.smin.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/umax.ll b/llvm/test/CodeGen/DirectX/umax.ll
index c7b6a87599279a..be0f557fc8da69 100644
--- a/llvm/test/CodeGen/DirectX/umax.ll
+++ b/llvm/test/CodeGen/DirectX/umax.ll
@@ -1,30 +1,31 @@
 ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
 
-; Make sure dxil operation function calls for umax are generated for i32/i64.
+; Make sure dxil operation function calls for umax are generated for i16/i32/i64.
 
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.7-library"
+; CHECK-LABEL:test_umax_i16
+define noundef i16 @test_umax_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.umax.i16(i16 %a, i16 %b)
+  ret i16 %0
+}
 
 ; CHECK-LABEL:test_umax_i32
-; Function Attrs: noinline nounwind optnone
-define noundef i32 @test_umax_i32(i32 noundef %a, i32 noundef %b) #0 {
+define noundef i32 @test_umax_i32(i32 noundef %a, i32 noundef %b) {
 entry:
-; CHECK:call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
   %0 = call i32 @llvm.umax.i32(i32 %a, i32 %b)
   ret i32 %0
 }
 
 ; CHECK-LABEL:test_umax_i64
-define noundef i64 @test_umax_i64(i64 noundef %a, i64 noundef %b) #0 {
+define noundef i64 @test_umax_i64(i64 noundef %a, i64 noundef %b) {
 entry:
-; CHECK:call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}})
+; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %{{.*}}, i64 %{{.*}})
   %0 = call i64 @llvm.umax.i64(i64 %a, i64 %b)
   ret i64 %0
 }
 
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare i32 @llvm.umax.i32(i32, i32) #1
-declare i64 @llvm.umax.i64(i64, i64) #1
-
-attributes #0 = { noinline nounwind }
-attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+declare i16 @llvm.umax.i16(i16, i16)
+declare i32 @llvm.umax.i32(i32, i32)
+declare i64 @llvm.umax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/DirectX/umin.ll b/llvm/test/CodeGen/DirectX/umin.ll
new file mode 100644
index 00000000000000..5051c711744892
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/umin.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for umin are generated for i16/i32/i64.
+
+; CHECK-LABEL:test_umin_i16
+define noundef i16 @test_umin_i16(i16 noundef %a, i16 noundef %b) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.umin.i16(i16 %a, i16 %b)
+  ret i16 %0
+}
+
+; CHECK-LABEL:test_umin_i32
+define noundef i32 @test_umin_i32(i32 noundef %a, i32 noundef %b) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}})
+  %0 = call i32 @llvm.umin.i32(i32 %a, i32 %b)
+  ret i32 %0
+}
+
+; CHECK-LABEL:test_umin_i64
+define noundef i64 @test_umin_i64(i64 noundef %a, i64 noundef %b) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %{{.*}})
+  %0 = call i64 @llvm.umin.i64(i64 %a, i64 %b)
+  ret i64 %0
+}
+
+declare i16 @llvm.umin.i16(i16, i16)
+declare i32 @llvm.umin.i32(i32, i32)
+declare i64 @llvm.umin.i64(i64, i64)

>From afe3e6dce778ae440d1ec51753e4982d4ab9b60b Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Fri, 15 Mar 2024 02:30:54 -0400
Subject: [PATCH 2/2] [HLSL] implement `clamp` intrinsic closes #70071 -
 `CGBuiltin.cpp` - Add the unsigned\generic clamp intrinsic emitter. -
 `IntrinsicsDirectX.td` - add the `dx.clamp` & `dx.uclamp` intrinsics -
 `DXILIntrinsicExpansion.cpp` - add the `clamp` instruction expansion while
 maintaining vector form. - `SemaChecking.cpp` -  Add `clamp`  builtin Sema
 Checks. - `Builtins.td` - add a `clamp` builtin - `hlsl_intrinsics.h` - add
 the `clamp` api Why `clamp` as instruction expansion  for DXIL? 1. SPIR-V has
 a GLSL `clamp` extension via: -
 [FClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#FClamp)
 -
 [UClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#UClamp)
 -
 [SClamp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#SClamp)
 2. Further Clamp lowers to  `min(max( x, min_range ), max_range)` which we
 have float, signed, and unsigned dixilOps for

commit specific changes:
make changes to support calling unsigned dxilops
---
 clang/lib/CodeGen/CGBuiltin.cpp               |  9 ++-
 clang/test/CodeGenHLSL/builtins/clamp.hlsl    | 24 +++---
 clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl  |  6 +-
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |  3 +-
 .../Target/DirectX/DXILIntrinsicExpansion.cpp | 48 ++++++------
 llvm/test/CodeGen/DirectX/clamp-vec.ll        | 74 +++++++++++++++++++
 llvm/test/CodeGen/DirectX/clamp.ll            | 32 +++++++-
 7 files changed, 155 insertions(+), 41 deletions(-)
 create mode 100644 llvm/test/CodeGen/DirectX/clamp-vec.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f831694fe9bc23..77fb1bc0f4eb3a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17985,8 +17985,15 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
     Value *OpX = EmitScalarExpr(E->getArg(0));
     Value *OpMin = EmitScalarExpr(E->getArg(1));
     Value *OpMax = EmitScalarExpr(E->getArg(2));
+
+    QualType Ty = E->getArg(0)->getType();
+    bool IsUnsigned = false;
+    if (auto *VecTy = Ty->getAs<VectorType>())
+      Ty = VecTy->getElementType();
+    IsUnsigned = Ty->isUnsignedIntegerType();
     return Builder.CreateIntrinsic(
-        /*ReturnType=*/OpX->getType(), Intrinsic::dx_clamp,
+        /*ReturnType=*/OpX->getType(),
+        IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp,
         ArrayRef<Value *>{OpX, OpMin, OpMax}, nullptr, "dx.clamp");
   }
   case Builtin::BI__builtin_hlsl_dot: {
diff --git a/clang/test/CodeGenHLSL/builtins/clamp.hlsl b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
index 3506beb2bd1faa..029e48ffe25865 100644
--- a/clang/test/CodeGenHLSL/builtins/clamp.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/clamp.hlsl
@@ -21,16 +21,16 @@ int16_t3 test_clamp_short3(int16_t3 p0, int16_t3 p1) { return clamp(p0, p1,p1);
 int16_t4 test_clamp_short4(int16_t4 p0, int16_t4 p1) { return clamp(p0, p1,p1); }
 
 // NATIVE_HALF: define noundef i16 @
-// NATIVE_HALF: call i16 @llvm.dx.clamp.i16(
+// NATIVE_HALF: call i16 @llvm.dx.uclamp.i16(
 uint16_t test_clamp_ushort(uint16_t p0, uint16_t p1) { return clamp(p0, p1,p1); }
 // NATIVE_HALF: define noundef <2 x i16> @
-// NATIVE_HALF: call <2 x i16> @llvm.dx.clamp.v2i16
+// NATIVE_HALF: call <2 x i16> @llvm.dx.uclamp.v2i16
 uint16_t2 test_clamp_ushort2(uint16_t2 p0, uint16_t2 p1) { return clamp(p0, p1,p1); }
 // NATIVE_HALF: define noundef <3 x i16> @
-// NATIVE_HALF: call <3 x i16> @llvm.dx.clamp.v3i16
+// NATIVE_HALF: call <3 x i16> @llvm.dx.uclamp.v3i16
 uint16_t3 test_clamp_ushort3(uint16_t3 p0, uint16_t3 p1) { return clamp(p0, p1,p1); }
 // NATIVE_HALF: define noundef <4 x i16> @
-// NATIVE_HALF: call <4 x i16> @llvm.dx.clamp.v4i16
+// NATIVE_HALF: call <4 x i16> @llvm.dx.uclamp.v4i16
 uint16_t4 test_clamp_ushort4(uint16_t4 p0, uint16_t4 p1) { return clamp(p0, p1,p1); }
 #endif
 
@@ -48,16 +48,16 @@ int3 test_clamp_int3(int3 p0, int3 p1) { return clamp(p0, p1,p1); }
 int4 test_clamp_int4(int4 p0, int4 p1) { return clamp(p0, p1,p1); }
 
 // CHECK: define noundef i32 @
-// CHECK: call i32 @llvm.dx.clamp.i32(
+// CHECK: call i32 @llvm.dx.uclamp.i32(
 int test_clamp_uint(uint p0, uint p1) { return clamp(p0, p1,p1); }
 // CHECK: define noundef <2 x i32> @
-// CHECK: call <2 x i32> @llvm.dx.clamp.v2i32
+// CHECK: call <2 x i32> @llvm.dx.uclamp.v2i32
 uint2 test_clamp_uint2(uint2 p0, uint2 p1) { return clamp(p0, p1,p1); }
 // CHECK: define noundef <3 x i32> @
-// CHECK: call <3 x i32> @llvm.dx.clamp.v3i32
+// CHECK: call <3 x i32> @llvm.dx.uclamp.v3i32
 uint3 test_clamp_uint3(uint3 p0, uint3 p1) { return clamp(p0, p1,p1); }
 // CHECK: define noundef <4 x i32> @
-// CHECK: call <4 x i32> @llvm.dx.clamp.v4i32
+// CHECK: call <4 x i32> @llvm.dx.uclamp.v4i32
 uint4 test_clamp_uint4(uint4 p0, uint4 p1) { return clamp(p0, p1,p1); }
 
 // CHECK: define noundef i64 @
@@ -74,16 +74,16 @@ int64_t3 test_clamp_long3(int64_t3 p0, int64_t3 p1) { return clamp(p0, p1,p1); }
 int64_t4 test_clamp_long4(int64_t4 p0, int64_t4 p1) { return clamp(p0, p1,p1); }
 
 // CHECK: define noundef i64 @
-// CHECK: call i64 @llvm.dx.clamp.i64(
+// CHECK: call i64 @llvm.dx.uclamp.i64(
 uint64_t test_clamp_long(uint64_t p0, uint64_t p1) { return clamp(p0, p1,p1); }
 // CHECK: define noundef <2 x i64> @
-// CHECK: call <2 x i64> @llvm.dx.clamp.v2i64
+// CHECK: call <2 x i64> @llvm.dx.uclamp.v2i64
 uint64_t2 test_clamp_long2(uint64_t2 p0, uint64_t2 p1) { return clamp(p0, p1,p1); }
 // CHECK: define noundef <3 x i64> @
-// CHECK: call <3 x i64> @llvm.dx.clamp.v3i64
+// CHECK: call <3 x i64> @llvm.dx.uclamp.v3i64
 uint64_t3 test_clamp_long3(uint64_t3 p0, uint64_t3 p1) { return clamp(p0, p1,p1); }
 // CHECK: define noundef <4 x i64> @
-// CHECK: call <4 x i64> @llvm.dx.clamp.v4i64
+// CHECK: call <4 x i64> @llvm.dx.uclamp.v4i64
 uint64_t4 test_clamp_long4(uint64_t4 p0, uint64_t4 p1) { return clamp(p0, p1,p1); }
 
 // NATIVE_HALF: define noundef half @
diff --git a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
index 0b6843591455bd..97ce931bf1b5b5 100644
--- a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl
@@ -72,15 +72,15 @@ float2 test_builtin_mad_int_vect_to_float_vec_promotion(int2 p0, float p1) {
 
 float builtin_bool_to_float_type_promotion(float p0, bool p1) {
   return __builtin_hlsl_mad(p0, p0, p1);
-  // expected-error at -1 {{3rd argument must be a vector, integer or floating point type (was 'bool')}}
+  // expected-error at -1 {{3rd argument must be a floating point type (was 'bool')}}
 }
 
 float builtin_bool_to_float_type_promotion2(bool p0, float p1) {
   return __builtin_hlsl_mad(p1, p0, p1);
-  // expected-error at -1 {{2nd argument must be a vector, integer or floating point type (was 'bool')}}
+  // expected-error at -1 {{2nd argument must be a floating point type (was 'bool')}}
 }
 
 float builtin_mad_int_to_float_promotion(float p0, int p1) {
   return __builtin_hlsl_mad(p0, p0, p1);
-  // expected-error at -1 {{arguments are of different types ('double' vs 'int')}}
+  // expected-error at -1 {{3rd argument must be a floating point type (was 'int')}}
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index c717063f7c8581..5c72f06f96ed12 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -21,7 +21,8 @@ def int_dx_create_handle : ClangBuiltin<"__builtin_hlsl_create_handle">,
     Intrinsic<[ llvm_ptr_ty ], [llvm_i8_ty], [IntrWillReturn]>;
 
 def int_dx_any  : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>;
-def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; 
+def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
+def int_dx_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; 
 def int_dx_dot : 
     Intrinsic<[LLVMVectorElementType<0>], 
     [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index bd20861ef20d5d..441111a7ce7ca9 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -36,6 +36,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::exp:
   case Intrinsic::dx_any:
   case Intrinsic::dx_clamp:
+  case Intrinsic::dx_uclamp:
   case Intrinsic::dx_lerp:
   case Intrinsic::dx_rcp:
     return true;
@@ -133,35 +134,33 @@ static bool expandRcpIntrinsic(CallInst *Orig) {
   return true;
 }
 
-static Intrinsic::IndependentIntrinsics
-getCorrectMaxIntrinsic(Type *elemTy) {
-  if(elemTy->isVectorTy())
-    elemTy = elemTy->getScalarType();
-  if (elemTy->isIntegerTy()) {
-      const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy);
-      if (intType->getSignBit())
-        return Intrinsic::smax;
+static Intrinsic::ID getCorrectMaxIntrinsic(Type *elemTy,
+                                            Intrinsic::ID clampIntrinsic) {
+  if (clampIntrinsic == Intrinsic::dx_uclamp)
     return Intrinsic::umax;
-  }
+  assert(clampIntrinsic == Intrinsic::dx_clamp);
+  if (elemTy->isVectorTy())
+    elemTy = elemTy->getScalarType();
+  if (elemTy->isIntegerTy())
+    return Intrinsic::smax;
   assert(elemTy->isFloatingPointTy());
   return Intrinsic::maxnum;
 }
 
-static Intrinsic::IndependentIntrinsics
-getCorrectMinIntrinsic(Type *elemTy) {
-  if(elemTy->isVectorTy())
-    elemTy = elemTy->getScalarType();
-  if (elemTy->isIntegerTy()) {
-      const llvm::IntegerType* intType = llvm::cast<llvm::IntegerType>(elemTy);
-      if (intType->getSignBit())
-        return Intrinsic::smin;
+static Intrinsic::ID getCorrectMinIntrinsic(Type *elemTy,
+                                            Intrinsic::ID clampIntrinsic) {
+  if (clampIntrinsic == Intrinsic::dx_uclamp)
     return Intrinsic::umin;
-  }
+  assert(clampIntrinsic == Intrinsic::dx_clamp);
+  if (elemTy->isVectorTy())
+    elemTy = elemTy->getScalarType();
+  if (elemTy->isIntegerTy())
+    return Intrinsic::smin;
   assert(elemTy->isFloatingPointTy());
   return Intrinsic::minnum;
 }
 
-static bool expandClampIntrinsic(CallInst *Orig) {
+static bool expandClampIntrinsic(CallInst *Orig, Intrinsic::ID clampIntrinsic) {
   Value *X = Orig->getOperand(0);
   Value *Min = Orig->getOperand(1);
   Value *Max = Orig->getOperand(2);
@@ -169,10 +168,12 @@ static bool expandClampIntrinsic(CallInst *Orig) {
   IRBuilder<> Builder(Orig->getParent());
   Builder.SetInsertPoint(Orig);
   auto *MaxCall =
-      Builder.CreateIntrinsic(Ty, getCorrectMaxIntrinsic(Ty), {X, Min}, nullptr, "dx.max");
+      Builder.CreateIntrinsic(Ty, getCorrectMaxIntrinsic(Ty, clampIntrinsic),
+                              {X, Min}, nullptr, "dx.max");
   auto *MinCall =
-      Builder.CreateIntrinsic(Ty, getCorrectMinIntrinsic(Ty), {MaxCall, Max}, nullptr, "dx.min");
-  
+      Builder.CreateIntrinsic(Ty, getCorrectMinIntrinsic(Ty, clampIntrinsic),
+                              {MaxCall, Max}, nullptr, "dx.min");
+
   Orig->replaceAllUsesWith(MinCall);
   Orig->eraseFromParent();
   return true;
@@ -184,8 +185,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
     return expandExpIntrinsic(Orig);
   case Intrinsic::dx_any:
     return expandAnyIntrinsic(Orig);
+  case Intrinsic::dx_uclamp:
   case Intrinsic::dx_clamp:
-    return expandClampIntrinsic(Orig);
+    return expandClampIntrinsic(Orig, F.getIntrinsicID());
   case Intrinsic::dx_lerp:
     return expandLerpIntrinsic(Orig);
   case Intrinsic::dx_rcp:
diff --git a/llvm/test/CodeGen/DirectX/clamp-vec.ll b/llvm/test/CodeGen/DirectX/clamp-vec.ll
new file mode 100644
index 00000000000000..d4f33a18b71573
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/clamp-vec.ll
@@ -0,0 +1,74 @@
+; RUN: opt -S  -dxil-intrinsic-expansion  < %s | FileCheck %s
+
+; Make sure dxil operation function calls for clamp are generated for float/int/uint vectors.
+
+; CHECK-LABEL: clamp_half3
+define noundef <3 x half> @clamp_half3(<3 x half> noundef %a, <3 x half> noundef %b, <3 x half> noundef %c) {
+entry:
+  ; CHECK: call <3 x half> @llvm.maxnum.v3f16(<3 x half>  %a, <3 x half>  %b)
+  ; CHECK: call <3 x half> @llvm.minnum.v3f16(<3 x half>  %{{.*}}, <3 x half>  %c)
+  %dx.clamp = call <3 x half> @llvm.dx.clamp.v3f16(<3 x half> %a, <3 x half> %b, <3 x half> %c)
+  ret <3 x half> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_float4
+define noundef <4 x float> @clamp_float4(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c) {
+entry:
+  ; CHECK: call <4 x float> @llvm.maxnum.v4f32(<4 x float>  %a, <4 x float>  %b)
+  ; CHECK: call <4 x float> @llvm.minnum.v4f32(<4 x float>  %{{.*}}, <4 x float>  %c)
+  %dx.clamp = call <4 x float> @llvm.dx.clamp.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+  ret <4 x float> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_double2
+define noundef <2 x double> @clamp_double2(<2 x double> noundef %a, <2 x double> noundef %b, <2 x double> noundef %c) {
+entry:
+  ; CHECK: call <2 x double> @llvm.maxnum.v2f64(<2 x double>  %a, <2 x double>  %b)
+  ; CHECK: call <2 x double> @llvm.minnum.v2f64(<2 x double>  %{{.*}}, <2 x double>  %c)
+  %dx.clamp = call <2 x double> @llvm.dx.clamp.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+  ret <2 x double> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_int4
+define noundef <4 x i32> @clamp_int4(<4 x i32> noundef %a, <4 x i32> noundef %b, <4 x i32> noundef %c) {
+entry:
+  ; CHECK: call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b)
+  ; CHECK: call <4 x i32> @llvm.smin.v4i32(<4 x i32> %{{.*}}, <4 x i32> %c)
+  %dx.clamp = call <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_uint16_t3
+define noundef <3 x i16> @clamp_uint16_t3(<3 x i16> noundef %a, <3 x i16> noundef %b, <3 x i16> noundef %c) {
+entry:
+  ; CHECK: call <3 x i16> @llvm.umax.v3i16(<3 x i16>  %a, <3 x i16>  %b)
+  ; CHECK: call <3 x i16> @llvm.umin.v3i16(<3 x i16>  %{{.*}}, <3 x i16>  %c)
+  %dx.clamp = call <3 x i16> @llvm.dx.uclamp.v3i16(<3 x i16> %a, <3 x i16> %b, <3 x i16> %c)
+  ret <3 x i16> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_uint4
+define noundef <4 x i32> @clamp_uint4(<4 x i32> noundef %a, <4 x i32> noundef %b, <4 x i32> noundef %c) {
+entry:
+  ; CHECK: call <4 x i32> @llvm.umax.v4i32(<4 x i32>  %a, <4 x i32>  %b)
+  ; CHECK: call <4 x i32> @llvm.umin.v4i32(<4 x i32>  %{{.*}}, <4 x i32>  %c)
+  %dx.clamp = call <4 x i32> @llvm.dx.uclamp.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+  ret <4 x i32> %dx.clamp
+}
+
+; CHECK-LABEL: clamp_uint64_t4
+define noundef <2 x i64> @clamp_uint64_t4(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) {
+entry:
+  ; CHECK: call <2 x i64> @llvm.umax.v2i64(<2 x i64>  %a, <2 x i64>  %b)
+  ; CHECK: call <2 x i64> @llvm.umin.v2i64(<2 x i64>  %{{.*}}, <2 x i64>  %c)
+  %dx.clamp = call <2 x i64> @llvm.dx.uclamp.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c)
+  ret <2 x i64> %dx.clamp
+}
+
+declare <3 x half> @llvm.dx.clamp.v3f16(<3 x half>, <3 x half>, <3 x half>)
+declare <4 x float> @llvm.dx.clamp.v4f32(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.dx.clamp.v2f64(<2 x double>, <2 x double>, <2 x double>)
+declare <4 x i32> @llvm.dx.clamp.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <3 x i16> @llvm.dx.uclamp.v3i32(<3 x i16>, <3 x i32>, <3 x i16>)
+declare <4 x i32> @llvm.dx.uclamp.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.dx.uclamp.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
diff --git a/llvm/test/CodeGen/DirectX/clamp.ll b/llvm/test/CodeGen/DirectX/clamp.ll
index 8c7d845eb00af9..f122313b8d7dcc 100644
--- a/llvm/test/CodeGen/DirectX/clamp.ll
+++ b/llvm/test/CodeGen/DirectX/clamp.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
 
-; Make sure dxil operation function calls for clamp are generated for i16/i32/i64.
+; Make sure dxil operation function calls for clamp/uclamp are generated for half/float/double/i16/i32/i64.
 
 ; CHECK-LABEL:test_clamp_i16
 define noundef i16 @test_clamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
@@ -56,9 +56,39 @@ entry:
   ret double %0
 }
 
+; CHECK-LABEL:test_uclamp_i16
+define noundef i16 @test_uclamp_i16(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
+entry:
+; CHECK: call i16 @dx.op.binary.i16(i32 39, i16 %{{.*}}, i16 %{{.*}})
+; CHECK: call i16 @dx.op.binary.i16(i32 40, i16 %{{.*}}, i16 %{{.*}})
+  %0 = call i16 @llvm.dx.uclamp.i16(i16 %a, i16 %b, i16 %c)
+  ret i16 %0
+}
+
+; CHECK-LABEL:test_uclamp_i32
+define noundef i32 @test_uclamp_i32(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
+entry:
+; CHECK: call i32 @dx.op.binary.i32(i32 39, i32 %{{.*}}, i32 %{{.*}})
+; CHECK: call i32 @dx.op.binary.i32(i32 40, i32 %{{.*}}, i32 %{{.*}})
+  %0 = call i32 @llvm.dx.uclamp.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %0
+}
+
+; CHECK-LABEL:test_uclamp_i64
+define noundef i64 @test_uclamp_i64(i64 noundef %a, i64 noundef %b, i64 noundef %c) {
+entry:
+; CHECK: call i64 @dx.op.binary.i64(i32 39, i64 %a, i64 %b)
+; CHECK: call i64 @dx.op.binary.i64(i32 40, i64 %{{.*}}, i64 %c)
+  %0 = call i64 @llvm.dx.uclamp.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %0
+}
+
 declare half @llvm.dx.clamp.f16(half, half, half)
 declare float @llvm.dx.clamp.f32(float, float, float)
 declare double @llvm.dx.clamp.f64(double, double, double)
 declare i16 @llvm.dx.clamp.i16(i16, i16, i16)
 declare i32 @llvm.dx.clamp.i32(i32, i32, i32)
 declare i64 @llvm.dx.clamp.i64(i64, i64, i64)
+declare i16 @llvm.dx.uclamp.i16(i16, i16, i16)
+declare i32 @llvm.dx.uclamp.i32(i32, i32, i32)
+declare i64 @llvm.dx.uclamp.i64(i64, i64, i64)