[clang] [llvm] [HLSL] Implement support for HLSL intrinsic - saturate (PR #104619)

Fri Aug 16 10:24:40 PDT 2024

https://github.com/bharadwajy created https://github.com/llvm/llvm-project/pull/104619

Implement support for HLSL intrinsic saturate.
Implement DXIL codegen for the intrinsic saturate by lowering it to DXIL Op dx.saturate.
Implement SPIRV codegen by transforming saturate(x) to clamp(x, 0.0f, 1.0f).

Add tests for DXIL and SPIRV CodeGen.

>From 7f338f8843ed815641541664ce2eff72278aacab Mon Sep 17 00:00:00 2001
From: Bharadwaj Yadavalli <Bharadwaj.Yadavalli at microsoft.com>
Date: Thu, 1 Aug 2024 02:46:05 +0000
Subject: [PATCH] Implement support to compile HLSL intrinsic "saturate" to
 DXIL

Add SPIRV Codegen support to transform saturate(x) to clamp(x, 0.0, 1.0)
Add tests for DXIL and SPIRV CodeGen.
---
 clang/include/clang/Basic/Builtins.td         |   6 +
 clang/lib/CodeGen/CGBuiltin.cpp               |   9 +
 clang/lib/CodeGen/CGHLSLRuntime.h             |   1 +
 clang/lib/Headers/hlsl/hlsl_intrinsics.h      |  41 ++-
 clang/lib/Sema/SemaHLSL.cpp                   |   3 +-
 clang/test/CodeGenHLSL/builtins/saturate.hlsl |  54 ++++
 .../SemaHLSL/BuiltIns/saturate-errors.hlsl    |  31 ++
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   1 +
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |   3 +-
 llvm/lib/Target/DirectX/DXIL.td               |  10 +
 .../Target/DirectX/DXILIntrinsicExpansion.cpp |  32 ++
 .../Target/SPIRV/SPIRVInstructionSelector.cpp |  80 ++++-
 llvm/test/CodeGen/DirectX/saturate.ll         | 276 ++++++++++++++++++
 llvm/test/CodeGen/DirectX/saturate_errors.ll  |  14 +
 .../CodeGen/SPIRV/hlsl-intrinsics/saturate.ll |  83 ++++++
 15 files changed, 627 insertions(+), 17 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/saturate.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/saturate-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/saturate.ll
 create mode 100644 llvm/test/CodeGen/DirectX/saturate_errors.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/saturate.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 0a874d8638df43..76e893e38b671c 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4743,6 +4743,12 @@ def HLSLRSqrt : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLSaturate : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_saturate"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 1c0baeaee03632..01841774562f06 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18666,6 +18666,15 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
         /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(),
         ArrayRef<Value *>{Op0}, nullptr, "hlsl.rsqrt");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_saturate: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+      llvm_unreachable("saturate operand must have a float representation");
+    return Builder.CreateIntrinsic(
+        /*ReturnType=*/Op0->getType(),
+        CGM.getHLSLRuntime().getSaturateIntrinsic(), ArrayRef<Value *>{Op0},
+        nullptr, "hlsl.saturate");
+  }
   case Builtin::BI__builtin_hlsl_wave_get_lane_index: {
     return EmitRuntimeCall(CGM.CreateRuntimeFunction(
         llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index",
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index cd604bea2e763d..b1455b5779acf9 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -79,6 +79,7 @@ class CGHLSLRuntime {
   GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(Saturate, saturate)
   GENERATE_HLSL_INTRINSIC_FUNCTION(ThreadId, thread_id)
 
   //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 678cdc77f8a71b..6d38b668fe770e 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -916,7 +916,7 @@ float4 lerp(float4, float4, float4);
 /// \brief Returns the length of the specified floating-point vector.
 /// \param x [in] The vector of floats, or a scalar float.
 ///
-/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + �).
+/// Length is based on the following formula: sqrt(x[0]^2 + x[1]^2 + ...).
 
 _HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_length)
@@ -1564,6 +1564,45 @@ float3 round(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_elementwise_roundeven)
 float4 round(float4);
 
+//===----------------------------------------------------------------------===//
+// saturate builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn T saturate(T Val)
+/// \brief Returns input value, \a Val, clamped within the range of 0.0f
+/// to 1.0f. \param Val The input value.
+
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+half saturate(half);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+half2 saturate(half2);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+half3 saturate(half3);
+_HLSL_16BIT_AVAILABILITY(shadermodel, 6.2)
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+half4 saturate(half4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+float saturate(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+float2 saturate(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+float3 saturate(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+float4 saturate(float4);
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+double saturate(double);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+double2 saturate(double2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+double3 saturate(double3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_saturate)
+double4 saturate(double4);
+
 //===----------------------------------------------------------------------===//
 // sin builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index e3e926465e799e..df01549cc2eeb6 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -356,7 +356,7 @@ static bool isLegalTypeForHLSLSV_DispatchThreadID(QualType T) {
   return true;
 }
 
-void SemaHLSL::handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL) {  
+void SemaHLSL::handleSV_DispatchThreadIDAttr(Decl *D, const ParsedAttr &AL) {
   auto *VD = cast<ValueDecl>(D);
   if (!isLegalTypeForHLSLSV_DispatchThreadID(VD->getType())) {
     Diag(AL.getLoc(), diag::err_hlsl_attr_invalid_type)
@@ -1045,6 +1045,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_saturate:
   case Builtin::BI__builtin_hlsl_elementwise_rcp: {
     if (CheckAllArgsHaveFloatRepresentation(&SemaRef, TheCall))
       return true;
diff --git a/clang/test/CodeGenHLSL/builtins/saturate.hlsl b/clang/test/CodeGenHLSL/builtins/saturate.hlsl
new file mode 100644
index 00000000000000..970d7b7371b1eb
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/saturate.hlsl
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -fnative-half-type \
+// RUN:   -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
+// RUN:   --check-prefixes=CHECK,NATIVE_HALF
+// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
+
+// NATIVE_HALF: define noundef half @
+// NATIVE_HALF: call half @llvm.dx.saturate.f16(
+// NO_HALF: define noundef float @"?test_saturate_half
+// NO_HALF: call float @llvm.dx.saturate.f32(
+half test_saturate_half(half p0) { return saturate(p0); }
+// NATIVE_HALF: define noundef <2 x half> @
+// NATIVE_HALF: call <2 x half> @llvm.dx.saturate.v2f16
+// NO_HALF: define noundef <2 x float> @"?test_saturate_half2
+// NO_HALF: call <2 x float> @llvm.dx.saturate.v2f32(
+half2 test_saturate_half2(half2 p0) { return saturate(p0); }
+// NATIVE_HALF: define noundef <3 x half> @
+// NATIVE_HALF: call <3 x half> @llvm.dx.saturate.v3f16
+// NO_HALF: define noundef <3 x float> @"?test_saturate_half3
+// NO_HALF: call <3 x float> @llvm.dx.saturate.v3f32(
+half3 test_saturate_half3(half3 p0) { return saturate(p0); }
+// NATIVE_HALF: define noundef <4 x half> @
+// NATIVE_HALF: call <4 x half> @llvm.dx.saturate.v4f16
+// NO_HALF: define noundef <4 x float> @"?test_saturate_half4
+// NO_HALF: call <4 x float> @llvm.dx.saturate.v4f32(
+half4 test_saturate_half4(half4 p0) { return saturate(p0); }
+
+// CHECK: define noundef float @"?test_saturate_float
+// CHECK: call float @llvm.dx.saturate.f32(
+float test_saturate_float(float p0) { return saturate(p0); }
+// CHECK: define noundef <2 x float> @"?test_saturate_float2
+// CHECK: call <2 x float> @llvm.dx.saturate.v2f32
+float2 test_saturate_float2(float2 p0) { return saturate(p0); }
+// CHECK: define noundef <3 x float> @"?test_saturate_float3
+// CHECK: call <3 x float> @llvm.dx.saturate.v3f32
+float3 test_saturate_float3(float3 p0) { return saturate(p0); }
+// CHECK: define noundef <4 x float> @"?test_saturate_float4
+// CHECK: call <4 x float> @llvm.dx.saturate.v4f32
+float4 test_saturate_float4(float4 p0) { return saturate(p0); }
+
+// CHECK: define noundef double @
+// CHECK: call double @llvm.dx.saturate.f64(
+double test_saturate_double(double p0) { return saturate(p0); }
+// CHECK: define noundef <2 x double> @
+// CHECK: call <2 x double> @llvm.dx.saturate.v2f64
+double2 test_saturate_double2(double2 p0) { return saturate(p0); }
+// CHECK: define noundef <3 x double> @
+// CHECK: call <3 x double> @llvm.dx.saturate.v3f64
+double3 test_saturate_double3(double3 p0) { return saturate(p0); }
+// CHECK: define noundef <4 x double> @
+// CHECK: call <4 x double> @llvm.dx.saturate.v4f64
+double4 test_saturate_double4(double4 p0) { return saturate(p0); }
diff --git a/clang/test/SemaHLSL/BuiltIns/saturate-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/saturate-errors.hlsl
new file mode 100644
index 00000000000000..721b28f86f950f
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/saturate-errors.hlsl
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected -Werror
+
+float2 test_no_arg() {
+  return saturate();
+  // expected-error at -1 {{no matching function for call to 'saturate'}}
+}
+
+float2 test_too_many_arg(float2 p0) {
+  return saturate(p0, p0, p0, p0);
+  // expected-error at -1 {{no matching function for call to 'saturate'}}
+}
+
+float2 test_saturate_vector_size_mismatch(float3 p0) {
+  return saturate(p0);
+  // expected-error at -1 {{implicit conversion truncates vector: 'float3' (aka 'vector<float, 3>') to 'vector<float, 2>'}}
+}
+
+float2 test_saturate_float2_int_splat(int p0) {
+  return saturate(p0);
+  // expected-error at -1 {{call to 'saturate' is ambiguous}}
+}
+
+float2 test_saturate_int_vect_to_float_vec_promotion(int2 p0) {
+  return saturate(p0);
+  // expected-error at -1 {{call to 'saturate' is ambiguous}}
+}
+
+float test_saturate_bool_type_promotion(bool p0) {
+  return saturate(p0);
+  // expected-error at -1 {{call to 'saturate' is ambiguous}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index c9102aa3dd972b..a0807a01ea5ab2 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -34,6 +34,7 @@ def int_dx_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>;
 def int_dx_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>;
 def int_dx_clamp : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
 def int_dx_uclamp : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>; 
+def int_dx_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
 
 def int_dx_dot2 : 
     Intrinsic<[LLVMVectorElementType<0>], 
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 1b5e463822749e..4e130ad0c907d9 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -61,9 +61,10 @@ let TargetPrefix = "spv" in {
   def int_spv_all : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>;
   def int_spv_any : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty]>;
   def int_spv_frac : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
-  def int_spv_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], 
+  def int_spv_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
     [IntrNoMem, IntrWillReturn] >;
   def int_spv_length : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>], [llvm_anyfloat_ty]>;
   def int_spv_normalize : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
   def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]>;
+  def int_spv_saturate : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
 }
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 67015cff78a79a..ac378db2c9b499 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -325,6 +325,16 @@ def Abs :  DXILOp<6, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
+def Saturate :  DXILOp<7, unary> {
+  let Doc = "Clamps a single or double precision floating point value to [0.0f...1.0f].";
+  let LLVMIntrinsic = int_dx_saturate;
+  let arguments = [overloadTy];
+  let result = overloadTy;
+  let overloads = [Overloads<DXIL1_0, [halfTy, floatTy, doubleTy]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
 def IsInf :  DXILOp<9, isSpecialFloat> {
   let Doc = "Determines if the specified value is infinite.";
   let LLVMIntrinsic = int_dx_isinf;
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index e63633b8a1e1ab..4285b5e5d5a48c 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -46,6 +47,7 @@ static bool isIntrinsicExpansion(Function &F) {
   case Intrinsic::dx_normalize:
   case Intrinsic::dx_sdot:
   case Intrinsic::dx_udot:
+  case Intrinsic::dx_saturate:
     return true;
   }
   return false;
@@ -362,6 +364,34 @@ static bool expandClampIntrinsic(CallInst *Orig, Intrinsic::ID ClampIntrinsic) {
   return true;
 }
 
+static bool expandSaturateIntrinsic(CallInst *SaturateCall) {
+  FunctionType *FT = SaturateCall->getFunctionType();
+  Type *FTRetTy = FT->getReturnType();
+  assert(FTRetTy == FT->getParamType(0) &&
+         "Unexpected different operand and return types of call to saturate");
+  if (FTRetTy->isVectorTy()) {
+    IRBuilder<> Builder(SaturateCall->getParent());
+    Builder.SetInsertPoint(SaturateCall);
+    FixedVectorType *FTVecRetTy = dyn_cast<FixedVectorType>(FTRetTy);
+    Function *Callee = dyn_cast<Function>(SaturateCall->getOperand(1));
+    assert(Callee->getIntrinsicID() == Intrinsic::dx_saturate);
+    Value *SrcVec = SaturateCall->getOperand(0);
+    Type *EltTy = FTVecRetTy->getScalarType();
+    Function *ScalarSatCallee = Intrinsic::getDeclaration(
+        SaturateCall->getModule(), Intrinsic::dx_saturate, {EltTy});
+    Value *Result;
+    for (unsigned I = 0; I < FTVecRetTy->getNumElements(); I++) {
+      Value *Elt = Builder.CreateExtractElement(SrcVec, I);
+      CallInst *CallSatutate =
+          Builder.CreateCall(ScalarSatCallee, {Elt}, "dx_saturate");
+      Result = Builder.CreateInsertElement(SrcVec, CallSatutate, I);
+    }
+    SaturateCall->replaceAllUsesWith(Result);
+    SaturateCall->eraseFromParent();
+  }
+  return true;
+}
+
 static bool expandIntrinsic(Function &F, CallInst *Orig) {
   switch (F.getIntrinsicID()) {
   case Intrinsic::abs:
@@ -388,6 +418,8 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
   case Intrinsic::dx_sdot:
   case Intrinsic::dx_udot:
     return expandIntegerDot(Orig, F.getIntrinsicID());
+  case Intrinsic::dx_saturate:
+    return expandSaturateIntrinsic(Orig);
   }
   return false;
 }
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 7cb19279518989..ecb3cee4e781af 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -247,6 +247,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
   bool selectNormalize(Register ResVReg, const SPIRVType *ResType,
                        MachineInstr &I) const;
 
+  bool selectSaturate(Register ResVReg, const SPIRVType *ResType,
+                      MachineInstr &I) const;
+
   bool selectSpvThreadId(Register ResVReg, const SPIRVType *ResType,
                          MachineInstr &I) const;
 
@@ -259,6 +262,7 @@ class SPIRVInstructionSelector : public InstructionSelector {
   Register buildZerosValF(const SPIRVType *ResType, MachineInstr &I) const;
   Register buildOnesVal(bool AllOnes, const SPIRVType *ResType,
                         MachineInstr &I) const;
+  Register buildOnesValF(const SPIRVType *ResType, MachineInstr &I) const;
 
   bool wrapIntoSpecConstantOp(MachineInstr &I,
                               SmallVector<Register> &CompositeArgs) const;
@@ -1285,6 +1289,34 @@ static unsigned getBoolCmpOpcode(unsigned PredNum) {
   }
 }
 
+static APFloat getZeroFP(const Type *LLVMFloatTy) {
+  if (!LLVMFloatTy)
+    return APFloat::getZero(APFloat::IEEEsingle());
+  switch (LLVMFloatTy->getScalarType()->getTypeID()) {
+  case Type::HalfTyID:
+    return APFloat::getZero(APFloat::IEEEhalf());
+  default:
+  case Type::FloatTyID:
+    return APFloat::getZero(APFloat::IEEEsingle());
+  case Type::DoubleTyID:
+    return APFloat::getZero(APFloat::IEEEdouble());
+  }
+}
+
+static APFloat getOneFP(const Type *LLVMFloatTy) {
+  if (!LLVMFloatTy)
+    return APFloat::getOne(APFloat::IEEEsingle());
+  switch (LLVMFloatTy->getScalarType()->getTypeID()) {
+  case Type::HalfTyID:
+    return APFloat::getOne(APFloat::IEEEhalf());
+  default:
+  case Type::FloatTyID:
+    return APFloat::getOne(APFloat::IEEEsingle());
+  case Type::DoubleTyID:
+    return APFloat::getOne(APFloat::IEEEdouble());
+  }
+}
+
 bool SPIRVInstructionSelector::selectAnyOrAll(Register ResVReg,
                                               const SPIRVType *ResType,
                                               MachineInstr &I,
@@ -1446,6 +1478,28 @@ bool SPIRVInstructionSelector::selectRsqrt(Register ResVReg,
       .constrainAllUses(TII, TRI, RBI);
 }
 
+/// Transform saturate(x) to clamp(x, 0.0f, 1.0f) as SPIRV
+/// does not have a saturate builtin.
+bool SPIRVInstructionSelector::selectSaturate(Register ResVReg,
+                                              const SPIRVType *ResType,
+                                              MachineInstr &I) const {
+  assert(I.getNumOperands() == 3);
+  assert(I.getOperand(2).isReg());
+  MachineBasicBlock &BB = *I.getParent();
+  Register VZero = buildZerosValF(ResType, I);
+  Register VOne = buildOnesValF(ResType, I);
+
+  return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
+      .addDef(ResVReg)
+      .addUse(GR.getSPIRVTypeID(ResType))
+      .addImm(static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+      .addImm(GL::FClamp)
+      .addUse(I.getOperand(2).getReg())
+      .addUse(VZero)
+      .addUse(VOne)
+      .constrainAllUses(TII, TRI, RBI);
+}
+
 bool SPIRVInstructionSelector::selectBitreverse(Register ResVReg,
                                                 const SPIRVType *ResType,
                                                 MachineInstr &I) const {
@@ -1724,20 +1778,6 @@ Register SPIRVInstructionSelector::buildZerosVal(const SPIRVType *ResType,
   return GR.getOrCreateConstInt(0, I, ResType, TII, ZeroAsNull);
 }
 
-static APFloat getZeroFP(const Type *LLVMFloatTy) {
-  if (!LLVMFloatTy)
-    return APFloat::getZero(APFloat::IEEEsingle());
-  switch (LLVMFloatTy->getScalarType()->getTypeID()) {
-  case Type::HalfTyID:
-    return APFloat::getZero(APFloat::IEEEhalf());
-  default:
-  case Type::FloatTyID:
-    return APFloat::getZero(APFloat::IEEEsingle());
-  case Type::DoubleTyID:
-    return APFloat::getZero(APFloat::IEEEdouble());
-  }
-}
-
 Register SPIRVInstructionSelector::buildZerosValF(const SPIRVType *ResType,
                                                   MachineInstr &I) const {
   // OpenCL uses nulls for Zero. In HLSL we don't use null constants.
@@ -1748,6 +1788,16 @@ Register SPIRVInstructionSelector::buildZerosValF(const SPIRVType *ResType,
   return GR.getOrCreateConstFP(VZero, I, ResType, TII, ZeroAsNull);
 }
 
+Register SPIRVInstructionSelector::buildOnesValF(const SPIRVType *ResType,
+                                                 MachineInstr &I) const {
+  // OpenCL uses nulls for Zero. In HLSL we don't use null constants.
+  bool ZeroAsNull = STI.isOpenCLEnv();
+  APFloat VOne = getOneFP(GR.getTypeForSPIRVType(ResType));
+  if (ResType->getOpcode() == SPIRV::OpTypeVector)
+    return GR.getOrCreateConstVector(VOne, I, ResType, TII, ZeroAsNull);
+  return GR.getOrCreateConstFP(VOne, I, ResType, TII, ZeroAsNull);
+}
+
 Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes,
                                                 const SPIRVType *ResType,
                                                 MachineInstr &I) const {
@@ -2181,6 +2231,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
       Size = 0;
     BuildMI(BB, I, I.getDebugLoc(), TII.get(Op)).addUse(PtrReg).addImm(Size);
   } break;
+  case Intrinsic::spv_saturate:
+    return selectSaturate(ResVReg, ResType, I);
   default: {
     std::string DiagMsg;
     raw_string_ostream OS(DiagMsg);
diff --git a/llvm/test/CodeGen/DirectX/saturate.ll b/llvm/test/CodeGen/DirectX/saturate.ll
new file mode 100644
index 00000000000000..0c96249ccea435
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/saturate.ll
@@ -0,0 +1,276 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; Make sure the intrinsic dx.saturate is to appropriate DXIL op for half/float/double data types.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxilv1.6-unknown-shadermodel6.6-library"
+
+; CHECK-LABEL: test_saturate_half
+define noundef half @test_saturate_half(half noundef %p0) #0 {
+entry:
+  %p0.addr = alloca half, align 2
+  store half %p0, ptr %p0.addr, align 2, !tbaa !4
+  %0 = load half, ptr %p0.addr, align 2, !tbaa !4
+  ; CHECK: %1 = call half @dx.op.unary.f16(i32 7, half %0)
+  %hlsl.saturate = call half @llvm.dx.saturate.f16(half %0)
+  ; CHECK: ret half %1
+  ret half %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare half @llvm.dx.saturate.f16(half) #1
+
+; CHECK-LABEL: test_saturate_half2
+define noundef <2 x half> @test_saturate_half2(<2 x half> noundef %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x half>, align 4
+  store <2 x half> %p0, ptr %p0.addr, align 4, !tbaa !8
+  %0 = load <2 x half>, ptr %p0.addr, align 4, !tbaa !8
+  ; CHECK: %1 = extractelement <2 x half> %0, i64 0
+  ; CHECK-NEXT: %2 = call half @dx.op.unary.f16(i32 7, half %1)
+  ; CHECK-NEXT: %3 = insertelement <2 x half> %0, half %2, i64 0
+  ; CHECK-NEXT: %4 = extractelement <2 x half> %0, i64 1
+  ; CHECK-NEXT: %5 = call half @dx.op.unary.f16(i32 7, half %4)
+  ; CHECK-NEXT: %6 = insertelement <2 x half> %0, half %5, i64 1
+  %hlsl.saturate = call <2 x half> @llvm.dx.saturate.v2f16(<2 x half> %0)
+  ; CHECK: ret <2 x half> %6
+  ret <2 x half> %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <2 x half> @llvm.dx.saturate.v2f16(<2 x half>) #1
+
+; CHECK-LABEL: test_saturate_half3
+define noundef <3 x half> @test_saturate_half3(<3 x half> noundef %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x half>, align 8
+  store <3 x half> %p0, ptr %p0.addr, align 8, !tbaa !8
+  %0 = load <3 x half>, ptr %p0.addr, align 8, !tbaa !8
+  ; CHECK: %1 = extractelement <3 x half> %0, i64 0
+  ; CHECK-NEXT: %2 = call half @dx.op.unary.f16(i32 7, half %1)
+  ; CHECK-NEXT: %3 = insertelement <3 x half> %0, half %2, i64 0
+  ; CHECK-NEXT: %4 = extractelement <3 x half> %0, i64 1
+  ; CHECK-NEXT: %5 = call half @dx.op.unary.f16(i32 7, half %4)
+  ; CHECK-NEXT: %6 = insertelement <3 x half> %0, half %5, i64 1
+  ; CHECK-NEXT: %7 = extractelement <3 x half> %0, i64 2
+  ; CHECK-NEXT: %8 = call half @dx.op.unary.f16(i32 7, half %7)
+  ; CHECK-NEXT: %9 = insertelement <3 x half> %0, half %8, i64 2
+  %hlsl.saturate = call <3 x half> @llvm.dx.saturate.v3f16(<3 x half> %0)
+  ; CHECK: ret <3 x half> %9
+  ret <3 x half> %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <3 x half> @llvm.dx.saturate.v3f16(<3 x half>) #1
+
+; CHECK-LABEL: test_saturate_half4
+define noundef <4 x half> @test_saturate_half4(<4 x half> noundef %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x half>, align 8
+  store <4 x half> %p0, ptr %p0.addr, align 8, !tbaa !8
+  %0 = load <4 x half>, ptr %p0.addr, align 8, !tbaa !8
+  ; CHECK: %1 = extractelement <4 x half> %0, i64 0
+  ; CHECK-NEXT: %2 = call half @dx.op.unary.f16(i32 7, half %1)
+  ; CHECK-NEXT: %3 = insertelement <4 x half> %0, half %2, i64 0
+  ; CHECK-NEXT: %4 = extractelement <4 x half> %0, i64 1
+  ; CHECK-NEXT: %5 = call half @dx.op.unary.f16(i32 7, half %4)
+  ; CHECK-NEXT: %6 = insertelement <4 x half> %0, half %5, i64 1
+  ; CHECK-NEXT: %7 = extractelement <4 x half> %0, i64 2
+  ; CHECK-NEXT: %8 = call half @dx.op.unary.f16(i32 7, half %7)
+  ; CHECK-NEXT: %9 = insertelement <4 x half> %0, half %8, i64 2
+  ; CHECK-NEXT: %10 = extractelement <4 x half> %0, i64 3
+  ; CHECK-NEXT: %11 = call half @dx.op.unary.f16(i32 7, half %10)
+  ; CHECK-NEXT: %12 = insertelement <4 x half> %0, half %11, i64 3
+  %hlsl.saturate = call <4 x half> @llvm.dx.saturate.v4f16(<4 x half> %0)
+  ; CHECK: ret <4 x half> %12
+  ret <4 x half> %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x half> @llvm.dx.saturate.v4f16(<4 x half>) #1
+
+; CHECK-LABEL: test_saturate_float
+define noundef float @test_saturate_float(float noundef %p0) #0 {
+entry:
+  %p0.addr = alloca float, align 4
+  store float %p0, ptr %p0.addr, align 4, !tbaa !9
+  %0 = load float, ptr %p0.addr, align 4, !tbaa !9
+  ; CHECK: %1 = call float @dx.op.unary.f32(i32 7, float %0)
+  %hlsl.saturate = call float @llvm.dx.saturate.f32(float %0)
+  ; CHECK: ret float %1
+  ret float %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare float @llvm.dx.saturate.f32(float) #1
+
+; CHECK-LABEL: test_saturate_float2
+define noundef <2 x float> @test_saturate_float2(<2 x float> noundef %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x float>, align 8
+  store <2 x float> %p0, ptr %p0.addr, align 8, !tbaa !8
+  %0 = load <2 x float>, ptr %p0.addr, align 8, !tbaa !8
+  ; CHECK: %1 = extractelement <2 x float> %0, i64 0
+  ; CHECK-NEXT: %2 = call float @dx.op.unary.f32(i32 7, float %1)
+  ; CHECK-NEXT: %3 = insertelement <2 x float> %0, float %2, i64 0
+  ; CHECK-NEXT: %4 = extractelement <2 x float> %0, i64 1
+  ; CHECK-NEXT: %5 = call float @dx.op.unary.f32(i32 7, float %4)
+  ; CHECK-NEXT: %6 = insertelement <2 x float> %0, float %5, i64 1
+  %hlsl.saturate = call <2 x float> @llvm.dx.saturate.v2f32(<2 x float> %0)
+  ; CHECK: ret <2 x float> %6
+  ret <2 x float> %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <2 x float> @llvm.dx.saturate.v2f32(<2 x float>) #1
+
+; CHECK-LABEL: test_saturate_float3
+define noundef <3 x float> @test_saturate_float3(<3 x float> noundef %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x float>, align 16
+  store <3 x float> %p0, ptr %p0.addr, align 16, !tbaa !8
+  %0 = load <3 x float>, ptr %p0.addr, align 16, !tbaa !8
+  ; CHECK: %1 = extractelement <3 x float> %0, i64 0
+  ; CHECK-NEXT: %2 = call float @dx.op.unary.f32(i32 7, float %1)
+  ; CHECK-NEXT: %3 = insertelement <3 x float> %0, float %2, i64 0
+  ; CHECK-NEXT: %4 = extractelement <3 x float> %0, i64 1
+  ; CHECK-NEXT: %5 = call float @dx.op.unary.f32(i32 7, float %4)
+  ; CHECK-NEXT: %6 = insertelement <3 x float> %0, float %5, i64 1
+  ; CHECK-NEXT: %7 = extractelement <3 x float> %0, i64 2
+  ; CHECK-NEXT: %8 = call float @dx.op.unary.f32(i32 7, float %7)
+  ; CHECK-NEXT: %9 = insertelement <3 x float> %0, float %8, i64 2
+  %hlsl.saturate = call <3 x float> @llvm.dx.saturate.v3f32(<3 x float> %0)
+  ; CHECK: ret <3 x float> %9
+  ret <3 x float> %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <3 x float> @llvm.dx.saturate.v3f32(<3 x float>) #1
+
+; CHECK-LABEL: test_saturate_float4
+define noundef <4 x float> @test_saturate_float4(<4 x float> noundef %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x float>, align 16
+  store <4 x float> %p0, ptr %p0.addr, align 16, !tbaa !8
+  %0 = load <4 x float>, ptr %p0.addr, align 16, !tbaa !8
+  ; CHECK: %1 = extractelement <4 x float> %0, i64 0
+  ; CHECK-NEXT: %2 = call float @dx.op.unary.f32(i32 7, float %1)
+  ; CHECK-NEXT: %3 = insertelement <4 x float> %0, float %2, i64 0
+  ; CHECK-NEXT: %4 = extractelement <4 x float> %0, i64 1
+  ; CHECK-NEXT: %5 = call float @dx.op.unary.f32(i32 7, float %4)
+  ; CHECK-NEXT: %6 = insertelement <4 x float> %0, float %5, i64 1
+  ; CHECK-NEXT: %7 = extractelement <4 x float> %0, i64 2
+  ; CHECK-NEXT: %8 = call float @dx.op.unary.f32(i32 7, float %7)
+  ; CHECK-NEXT: %9 = insertelement <4 x float> %0, float %8, i64 2
+  ; CHECK-NEXT: %10 = extractelement <4 x float> %0, i64 3
+  ; CHECK-NEXT: %11 = call float @dx.op.unary.f32(i32 7, float %10)
+  ; CHECK-NEXT: %12 = insertelement <4 x float> %0, float %11, i64 3
+  %hlsl.saturate = call <4 x float> @llvm.dx.saturate.v4f32(<4 x float> %0)
+  ; CHECK: ret <4 x float> %12
+  ret <4 x float> %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x float> @llvm.dx.saturate.v4f32(<4 x float>) #1
+
+; CHECK-LABEL: test_saturate_double
+define noundef double @test_saturate_double(double noundef %p0) #0 {
+entry:
+  %p0.addr = alloca double, align 8
+  store double %p0, ptr %p0.addr, align 8, !tbaa !11
+  %0 = load double, ptr %p0.addr, align 8, !tbaa !11
+  ; CHECK: %1 = call double @dx.op.unary.f64(i32 7, double %0)
+  %hlsl.saturate = call double @llvm.dx.saturate.f64(double %0)
+  ; CHECK: ret double %1
+  ret double %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare double @llvm.dx.saturate.f64(double) #1
+
+; CHECK-LABEL: test_saturate_double2
+define noundef <2 x double> @test_saturate_double2(<2 x double> noundef %p0) #0 {
+entry:
+  %p0.addr = alloca <2 x double>, align 16
+  store <2 x double> %p0, ptr %p0.addr, align 16, !tbaa !8
+  %0 = load <2 x double>, ptr %p0.addr, align 16, !tbaa !8
+  ; CHECK: %1 = extractelement <2 x double> %0, i64 0
+  ; CHECK-NEXT: %2 = call double @dx.op.unary.f64(i32 7, double %1)
+  ; CHECK-NEXT: %3 = insertelement <2 x double> %0, double %2, i64 0
+  ; CHECK-NEXT: %4 = extractelement <2 x double> %0, i64 1
+  ; CHECK-NEXT: %5 = call double @dx.op.unary.f64(i32 7, double %4)
+  ; CHECK-NEXT: %6 = insertelement <2 x double> %0, double %5, i64 1
+  %hlsl.saturate = call <2 x double> @llvm.dx.saturate.v2f64(<2 x double> %0)
+  ; CHECK: ret <2 x double> %6
+  ret <2 x double> %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <2 x double> @llvm.dx.saturate.v2f64(<2 x double>) #1
+
+; CHECK-LABEL: test_saturate_double3
+define noundef <3 x double> @test_saturate_double3(<3 x double> noundef %p0) #0 {
+entry:
+  %p0.addr = alloca <3 x double>, align 32
+  store <3 x double> %p0, ptr %p0.addr, align 32, !tbaa !8
+  %0 = load <3 x double>, ptr %p0.addr, align 32, !tbaa !8
+  ; CHECK: %1 = extractelement <3 x double> %0, i64 0
+  ; CHECK-NEXT: %2 = call double @dx.op.unary.f64(i32 7, double %1)
+  ; CHECK-NEXT: %3 = insertelement <3 x double> %0, double %2, i64 0
+  ; CHECK-NEXT: %4 = extractelement <3 x double> %0, i64 1
+  ; CHECK-NEXT: %5 = call double @dx.op.unary.f64(i32 7, double %4)
+  ; CHECK-NEXT: %6 = insertelement <3 x double> %0, double %5, i64 1
+  ; CHECK-NEXT: %7 = extractelement <3 x double> %0, i64 2
+  ; CHECK-NEXT: %8 = call double @dx.op.unary.f64(i32 7, double %7)
+  ; CHECK-NEXT: %9 = insertelement <3 x double> %0, double %8, i64 2
+  %hlsl.saturate = call <3 x double> @llvm.dx.saturate.v3f64(<3 x double> %0)
+  ; CHECK: ret <3 x double> %9
+  ret <3 x double> %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <3 x double> @llvm.dx.saturate.v3f64(<3 x double>) #1
+
+; CHECK-LABEL: test_saturate_double4
+define noundef <4 x double> @test_saturate_double4(<4 x double> noundef %p0) #0 {
+entry:
+  %p0.addr = alloca <4 x double>, align 32
+  store <4 x double> %p0, ptr %p0.addr, align 32, !tbaa !8
+  %0 = load <4 x double>, ptr %p0.addr, align 32, !tbaa !8
+  ; CHECK: %1 = extractelement <4 x double> %0, i64 0
+  ; CHECK-NEXT: %2 = call double @dx.op.unary.f64(i32 7, double %1)
+  ; CHECK-NEXT: %3 = insertelement <4 x double> %0, double %2, i64 0
+  ; CHECK-NEXT: %4 = extractelement <4 x double> %0, i64 1
+  ; CHECK-NEXT: %5 = call double @dx.op.unary.f64(i32 7, double %4)
+  ; CHECK-NEXT: %6 = insertelement <4 x double> %0, double %5, i64 1
+  ; CHECK-NEXT: %7 = extractelement <4 x double> %0, i64 2
+  ; CHECK-NEXT: %8 = call double @dx.op.unary.f64(i32 7, double %7)
+  ; CHECK-NEXT: %9 = insertelement <4 x double> %0, double %8, i64 2
+  ; CHECK-NEXT: %10 = extractelement <4 x double> %0, i64 3
+  ; CHECK-NEXT: %11 = call double @dx.op.unary.f64(i32 7, double %10)
+  ; CHECK-NEXT: %12 = insertelement <4 x double> %0, double %11, i64 3
+  %hlsl.saturate = call <4 x double> @llvm.dx.saturate.v4f64(<4 x double> %0)
+  ; CHECK: ret <4 x double> %12
+  ret <4 x double> %hlsl.saturate
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x double> @llvm.dx.saturate.v4f64(<4 x double>) #1
+
+attributes #0 = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn }
+
+!llvm.module.flags = !{!0, !1}
+!dx.valver = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{i32 1, i32 7}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"half", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C++ TBAA"}
+!8 = !{!6, !6, i64 0}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"float", !6, i64 0}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"double", !6, i64 0}
diff --git a/llvm/test/CodeGen/DirectX/saturate_errors.ll b/llvm/test/CodeGen/DirectX/saturate_errors.ll
new file mode 100644
index 00000000000000..940843f5e58475
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/saturate_errors.ll
@@ -0,0 +1,14 @@
+; RUN: not opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s 2>&1 | FileCheck %s
+; Make sure the intrinsic dx.saturate is to appropriate DXIL op for half/float/double data types.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxilv1.6-unknown-shadermodel6.6-library"
+
+; DXIL operation saturate does not support i32 overload
+; CHECK: invalid intrinsic signature
+
+define noundef i32 @test_saturate_i32(i32 noundef %p0) #0 {
+entry:
+  %hlsl.saturate = call i32 @llvm.dx.saturate.i32(i32 %p0)
+  ret i32 %hlsl.saturate
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/saturate.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/saturate.ll
new file mode 100644
index 00000000000000..0b05b615c4ad17
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/saturate.ll
@@ -0,0 +1,83 @@
+; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Make sure SPIRV operation function calls for saturate are lowered correctly.
+
+; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
+; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
+; CHECK-DAG: %[[#float_64:]] = OpTypeFloat 64
+; CHECK-DAG: %[[#vec4_float_64:]] = OpTypeVector %[[#float_64]] 4
+; CHECK-DAG: %[[#zero_float_16:]] = OpConstant %[[#float_16]] 0
+; CHECK-DAG: %[[#vec4_zero_float_16:]] = OpConstantComposite %[[#vec4_float_16]] %[[#zero_float_16]] %[[#zero_float_16]] %[[#zero_float_16]]
+; CHECK-DAG: %[[#one_float_16:]] = OpConstant %[[#float_16]] 15360
+; CHECK-DAG: %[[#vec4_one_float_16:]] = OpConstantComposite %[[#vec4_float_16]] %[[#one_float_16]] %[[#one_float_16]] %[[#one_float_16]]
+; CHECK-DAG: %[[#zero_float_32:]] = OpConstant %[[#float_32]] 0
+; CHECK-DAG: %[[#vec4_zero_float_32:]] = OpConstantComposite %[[#vec4_float_32]] %[[#zero_float_32]] %[[#zero_float_32]] %[[#zero_float_32]]
+; CHECK-DAG: %[[#one_float_32:]] = OpConstant %[[#float_32]] 1
+; CHECK-DAG: %[[#vec4_one_float_32:]] = OpConstantComposite %[[#vec4_float_32]] %[[#one_float_32]] %[[#one_float_32]] %[[#one_float_32]]
+
+; CHECK-DAG: %[[#zero_float_64:]] = OpConstant %[[#float_64]] 0
+; CHECK-DAG: %[[#vec4_zero_float_64:]] = OpConstantComposite %[[#vec4_float_64]] %[[#zero_float_64]] %[[#zero_float_64]] %[[#zero_float_64]]
+; CHECK-DAG: %[[#one_float_64:]] = OpConstant %[[#float_64]] 1
+; CHECK-DAG: %[[#vec4_one_float_64:]] = OpConstantComposite %[[#vec4_float_64]] %[[#one_float_64]] %[[#one_float_64]] %[[#one_float_64]]
+
+define noundef half @saturate_half(half noundef %a) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#float_16]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16]]
+  ; CHECK: %[[#]] = OpExtInst %[[#float_16]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#zero_float_16]] %[[#one_float_16]]
+  %hlsl.saturate = call half @llvm.spv.saturate.f16(half %a)
+  ret half %hlsl.saturate
+}
+
+define noundef float @saturate_float(float noundef %a) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#float_32]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32]]
+  ; CHECK: %[[#]] = OpExtInst %[[#float_32]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#zero_float_32]] %[[#one_float_32]]
+  %hlsl.saturate = call float @llvm.spv.saturate.f32(float %a)
+  ret float %hlsl.saturate
+}
+
+define noundef double @saturate_double(double noundef %a) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#float_64]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_64]]
+  ; CHECK: %[[#]] = OpExtInst %[[#float_64]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#zero_float_64]] %[[#one_float_64]]
+  %hlsl.saturate = call double @llvm.spv.saturate.f64(double %a)
+  ret double %hlsl.saturate
+}
+
+define noundef <4 x half> @saturate_half4(<4 x half> noundef %a) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_16]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_16]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#vec4_zero_float_16]] %[[#vec4_one_float_16]]
+  %hlsl.saturate = call <4 x half> @llvm.spv.saturate.v4f16(<4 x half> %a)
+  ret <4 x half> %hlsl.saturate
+}
+
+define noundef <4 x float> @saturate_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_32]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_32]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#vec4_zero_float_32]] %[[#vec4_one_float_32]]
+  %hlsl.saturate = call <4 x float> @llvm.spv.saturate.v4f32(<4 x float> %a)
+  ret <4 x float> %hlsl.saturate
+}
+
+define noundef <4 x double> @saturate_double4(<4 x double> noundef %a) {
+entry:
+  ; CHECK: %[[#]] = OpFunction %[[#vec4_float_64]] None %[[#]]
+  ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_64]]
+  ; CHECK: %[[#]] = OpExtInst %[[#vec4_float_64]] %[[#op_ext_glsl]] FClamp %[[#arg0]] %[[#vec4_zero_float_64]] %[[#vec4_one_float_64]]
+  %hlsl.saturate = call <4 x double> @llvm.spv.saturate.v4f64(<4 x double> %a)
+  ret <4 x double> %hlsl.saturate
+}
+
+declare <4 x half> @llvm.spv.saturate.v4f16(<4 x half>)
+declare <4 x float> @llvm.spv.saturate.v4f32(<4 x float>)
+declare <4 x double> @llvm.spv.saturate.v4f64(<4 x double>)