[llvm] d5f4055 - [HLSL] Implement f32tof16() intrinsic (#172469)

Mon Jan 26 07:06:53 PST 2026

Author: Tim Corringham
Date: 2026-01-26T15:06:48Z
New Revision: d5f405558d8f7b6018d6f3a9f75b5ffa6ae82feb

URL: https://github.com/llvm/llvm-project/commit/d5f405558d8f7b6018d6f3a9f75b5ffa6ae82feb
DIFF: https://github.com/llvm/llvm-project/commit/d5f405558d8f7b6018d6f3a9f75b5ffa6ae82feb.diff

LOG: [HLSL] Implement f32tof16() intrinsic (#172469)

Implement the f32tof16() intrinsic, DXIL and SPIRV codegen, and related
tests.

Fixes #99113

---------

Co-authored-by: Tim Corringham <tcorring at amd.com>

Added: 
    clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
    clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
    clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
    llvm/test/CodeGen/DirectX/f32tof16.ll
    llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll
    llvm/test/CodeGen/SPIRV/packhalf2x16.ll

Modified: 
    clang/include/clang/Basic/Builtins.td
    clang/lib/CodeGen/CGHLSLBuiltins.cpp
    clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
    clang/lib/Sema/SemaHLSL.cpp
    llvm/include/llvm/IR/IntrinsicsDirectX.td
    llvm/include/llvm/IR/IntrinsicsSPIRV.td
    llvm/lib/Target/DirectX/DXIL.td
    llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
    llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index bc8f1474493b0..dcf07d659cb15 100644

--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5314,6 +5314,12 @@ def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLF32ToF16 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_f32tof16"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def HLSLDdxCoarse : LangBuiltin<"HLSL_LANG"> {
   let Spellings = ["__builtin_hlsl_elementwise_ddx_coarse"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];

diff  --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 36691c7b72efe..cfe5be354a494 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -220,33 +220,86 @@ static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
   if (CGF.CGM.getTriple().isSPIRV()) {
     // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
     // Int16 and Float16 capabilities
-    auto UnpackType =
+    auto *UnpackType =
         llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+
     if (NumElements == 0) {
       // a scalar input - simply extract the first element of the unpacked
       // vector
       Value *Unpack = CGF.Builder.CreateIntrinsic(
           UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{Op0});
       return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
-    } else {
-      // a vector input - build a congruent output vector by iterating through
-      // the input vector calling unpackhalf2x16 for each element
-      Value *Result = PoisonValue::get(ResType);
-      for (uint64_t i = 0; i < NumElements; i++) {
-        Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
-        Value *Unpack = CGF.Builder.CreateIntrinsic(
-            UnpackType, Intrinsic::spv_unpackhalf2x16,
-            ArrayRef<Value *>{InVal});
-        Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
-        Result = CGF.Builder.CreateInsertElement(Result, Res, i);
-      }
-      return Result;
     }
+
+    // a vector input - build a congruent output vector by iterating through
+    // the input vector calling unpackhalf2x16 for each element
+    Value *Result = PoisonValue::get(ResType);
+    for (uint64_t I = 0; I < NumElements; I++) {
+      Value *InVal = CGF.Builder.CreateExtractElement(Op0, I);
+      Value *Unpack = CGF.Builder.CreateIntrinsic(
+          UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{InVal});
+      Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
+      Result = CGF.Builder.CreateInsertElement(Result, Res, I);
+    }
+    return Result;
   }
 
   llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
 }
 
+static Value *handleElementwiseF32ToF16(CodeGenFunction &CGF,
+                                        const CallExpr *E) {
+  Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
+  QualType Op0Ty = E->getArg(0)->getType();
+  llvm::Type *ResType = CGF.IntTy;
+  uint64_t NumElements = 0;
+  if (Op0->getType()->isVectorTy()) {
+    NumElements =
+        E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
+    ResType =
+        llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
+  }
+  if (!Op0Ty->hasFloatingRepresentation())
+    llvm_unreachable("f32tof16 operand must have a float representation");
+
+  if (CGF.CGM.getTriple().isDXIL())
+    return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf32tof16,
+                                       ArrayRef<Value *>{Op0}, nullptr,
+                                       "hlsl.f32tof16");
+
+  if (CGF.CGM.getTriple().isSPIRV()) {
+    // We use the SPIRV PackHalf2x16 operation to avoid the need for the
+    // Int16 and Float16 capabilities
+    auto *PackType =
+        llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+
+    if (NumElements == 0) {
+      // a scalar input - simply insert the scalar in the first element
+      // of the 2 element float vector
+      Value *Float2 = Constant::getNullValue(PackType);
+      Float2 = CGF.Builder.CreateInsertElement(Float2, Op0, (uint64_t)0);
+      Value *Result = CGF.Builder.CreateIntrinsic(
+          ResType, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+      return Result;
+    }
+
+    // a vector input - build a congruent output vector by iterating through
+    // the input vector calling packhalf2x16 for each element
+    Value *Result = PoisonValue::get(ResType);
+    for (uint64_t I = 0; I < NumElements; I++) {
+      Value *Float2 = Constant::getNullValue(PackType);
+      Value *InVal = CGF.Builder.CreateExtractElement(Op0, I);
+      Float2 = CGF.Builder.CreateInsertElement(Float2, InVal, (uint64_t)0);
+      Value *Res = CGF.Builder.CreateIntrinsic(
+          CGF.IntTy, Intrinsic::spv_packhalf2x16, ArrayRef<Value *>{Float2});
+      Result = CGF.Builder.CreateInsertElement(Result, Res, I);
+    }
+    return Result;
+  }
+
+  llvm_unreachable("Intrinsic F32ToF16 not supported by target architecture");
+}
+
 static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
                                LValue &Stride) {
   // Figure out the stride of the buffer elements from the handle type.
@@ -712,6 +765,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
   case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
     return handleElementwiseF16ToF32(*this, E);
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
+    return handleElementwiseF32ToF16(*this, E);
+  }
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     if (!E->getArg(0)->getType()->hasFloatingRepresentation())

diff  --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index e9a41b94d6c03..a994645fdc01e 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1105,6 +1105,27 @@ float3 f16tof32(uint3);
 _HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
 float4 f16tof32(uint4);
 
+//===----------------------------------------------------------------------===//
+// f32tof16 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn uint f32tof16(float x)
+/// \brief Returns the float arg value converted to half in the low 16 bits of
+/// the uint return value
+/// \param x The float to be converted to half.
+///
+/// The return value is a uint containing the converted half value in the low
+/// 16 bits.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint f32tof16(float);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint2 f32tof16(float2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint3 f32tof16(float3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f32tof16)
+uint4 f32tof16(float4);
+
 //===----------------------------------------------------------------------===//
 // firstbitlow builtins
 //===----------------------------------------------------------------------===//

diff  --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 4d31e26d56e6b..1d8e035cd9713 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2998,6 +2998,20 @@ static bool CheckAllArgTypesAreCorrect(
   return false;
 }
 
+static bool CheckFloatRepresentation(Sema *S, SourceLocation Loc,
+                                     int ArgOrdinal,
+                                     clang::QualType PassedType) {
+  clang::QualType BaseType =
+      PassedType->isVectorType()
+          ? PassedType->castAs<clang::VectorType>()->getElementType()
+          : PassedType;
+  if (!BaseType->isFloat32Type())
+    return S->Diag(Loc, diag::err_builtin_invalid_arg_type)
+           << ArgOrdinal << /* scalar or vector of */ 5 << /* no int */ 0
+           << /* float */ 1 << PassedType;
+  return false;
+}
+
 static bool CheckFloatOrHalfRepresentation(Sema *S, SourceLocation Loc,
                                            int ArgOrdinal,
                                            clang::QualType PassedType) {
@@ -3716,6 +3730,15 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy);
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f32tof16: {
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+    if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall, CheckFloatRepresentation))
+      return true;
+    SetElementTypeAsReturnType(&SemaRef, TheCall,
+                               getASTContext().UnsignedIntTy);
+    break;
+  }
   }
   return false;
 }

diff  --git a/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
new file mode 100644
index 0000000000000..ede6d5c0f3236
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16-builtin.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+// CHECK: ret i32 %hlsl.f32tof16
+// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+uint test_scalar(float p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+// CHECK: ret <2 x i32> %hlsl.f32tof16
+// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+uint2 test_uint2(float2 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+// CHECK: ret <3 x i32> %hlsl.f32tof16
+// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+uint3 test_uint3(float3 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }
+
+// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+// CHECK: ret <4 x i32> %hlsl.f32tof16
+// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+uint4 test_uint4(float4 p0) { return __builtin_hlsl_elementwise_f32tof16(p0); }

diff  --git a/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
new file mode 100644
index 0000000000000..008f495ef869c
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f32tof16.hlsl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef i32 @_Z11test_scalarf(float noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call i32 @llvm.dx.legacyf32tof16.f32(float %0)
+// CHECK: ret i32 %hlsl.f32tof16
+// CHECK: declare i32 @llvm.dx.legacyf32tof16.f32(float) #1
+uint test_scalar(float p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <2 x i32> @_Z10test_uint2Dv2_f(<2 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float> %0)
+// CHECK: ret <2 x i32> %hlsl.f32tof16
+// CHECK: declare <2 x i32> @llvm.dx.legacyf32tof16.v2f32(<2 x float>) #1
+uint2 test_uint2(float2 p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <3 x i32> @_Z10test_uint3Dv3_f(<3 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %0)
+// CHECK: ret <3 x i32> %hlsl.f32tof16
+// CHECK: declare <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float>) #1
+uint3 test_uint3(float3 p0) { return f32tof16(p0); }
+
+// CHECK: define hidden noundef <4 x i32> @_Z10test_uint4Dv4_f(<4 x float> noundef nofpclass(nan inf) %p0) #0 {
+// CHECK: %hlsl.f32tof16 = call <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float> %0)
+// CHECK: ret <4 x i32> %hlsl.f32tof16
+// CHECK: declare <4 x i32> @llvm.dx.legacyf32tof16.v4f32(<4 x float>) #1
+uint4 test_uint4(float4 p0) { return f32tof16(p0); }

diff  --git a/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
new file mode 100644
index 0000000000000..cd95602b413c5
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/f32tof16-errors.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify
+
+uint builtin_f32tof16_too_few_arg() {
+  return __builtin_hlsl_elementwise_f32tof16();
+  // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+  // expected-note at hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}}
+}
+
+uint builtin_f32tof16_too_many_arg(uint p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0, p0);
+  // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+  // expected-note at hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}}
+}
+
+uint builtin_f32tof16_bool(bool p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'bool')}}
+}
+
+uint builtin_f32tof16_bool4(bool4 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'bool4' (aka 'vector<bool, 4>')}}
+}
+
+uint builtin_f32tof16_short(short p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'short')}}
+}
+
+uint builtin_f32tof16_unsigned_short(unsigned short p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned short')}}
+}
+
+uint builtin_f32tof16_int(int p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+}
+
+uint builtin_f32tof16_int64_t(long p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'long')}}
+}
+
+uint2 builtin_f32tof16_int2_to_float2_promotion(int2 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'int2' (aka 'vector<int, 2>'))}}
+}
+
+uint builtin_f32tof16_half(half p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'half')}}
+}
+
+uint builtin_f32tof16_half4(half4 p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'half4' (aka 'vector<half, 4>'))}}
+}
+
+uint builtin_f32tof16_float(unsigned int p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned int')}}
+}
+
+uint builtin_f32tof16_double(double p0) {
+  return __builtin_hlsl_elementwise_f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'double')}}
+}
+
+uint f32tof16_too_few_arg() {
+  return f32tof16();
+  // expected-error at -1 {{no matching function for call to 'f32tof16'}}
+}
+
+uint f32tof16_too_many_arg(uint p0) {
+  return f32tof16(p0, p0);
+  // expected-error at -1 {{no matching function for call to 'f32tof16'}}
+}
+
+uint f32tof16_bool(bool p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'bool')}}
+}
+
+uint f32tof16_bool3(bool3 p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'bool3' (aka 'vector<bool, 3>'))}}
+}
+
+
+uint f32tof16_int16_t(short p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'short')}}
+}
+
+uint f32tof16_int16_t(unsigned short p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned short')}}
+}
+
+uint f32tof16_int(int p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+}
+
+uint f32tof16_int64_t(long p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'long')}}
+}
+
+uint2 f32tof16_int2_to_float2_promotion(int3 p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'int3' (aka 'vector<int, 3>'))}}
+}
+
+uint f32tof16_half(half p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'half')}}
+}
+
+uint f32tof16_half2(half2 p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'half2' (aka 'vector<half, 2>'))}}
+}
+
+uint f32tof16_float(uint p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'uint' (aka 'unsigned int'))}}
+}
+
+uint f32tof16_double(double p0) {
+  return f32tof16(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of floating-point types (was 'double')}}
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 23627848b6214..6b07ce8b30705 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -154,6 +154,9 @@ def int_dx_isnan : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1
 def int_dx_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
     [llvm_anyint_ty], [IntrNoMem]>;
 
+def int_dx_legacyf32tof16 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>],
+    [llvm_anyfloat_ty], [IntrNoMem]>;
+
 def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
     [IntrNoMem]>;
 

diff  --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index a93e8ad0ce964..6124ce594d1ea 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -225,5 +225,7 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
        [IntrNoMem, NoUndef<RetIndex>]>;
 
   def int_spv_unpackhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_spv_packhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
+
 
 }

diff  --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 3a40d2c36139d..01685dd0e22ec 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -1124,8 +1124,17 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> {
   let attributes = [Attributes<DXIL1_0, []>];
 }
 
+def LegacyF32ToF16 : DXILOp<130, legacyF32ToF16> {
+  let Doc = "converts the float stored in the first element of the float2 "
+            "to a half and stores it in the low-half of the output uint";
+  let intrinsics = [IntrinSelect<int_dx_legacyf32tof16>];
+  let arguments = [FloatTy];
+  let result = Int32Ty;
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
 def LegacyF16ToF32 : DXILOp<131, legacyF16ToF32> {
-  let Doc = "returns the float16 stored in the low-half of the uint converted "
+  let Doc = "returns the half stored in the low-half of the uint converted "
             "to a float";
   let intrinsics = [IntrinSelect<int_dx_legacyf16tof32>];
   let arguments = [Int32Ty];

diff  --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index f54b48b91265e..23f45b5fe2270 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -35,6 +35,7 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
   case Intrinsic::dx_isinf:
   case Intrinsic::dx_isnan:
   case Intrinsic::dx_legacyf16tof32:
+  case Intrinsic::dx_legacyf32tof16:
     return OpdIdx == 0;
   default:
     return OpdIdx == -1;
@@ -52,6 +53,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
   case Intrinsic::dx_isinf:
   case Intrinsic::dx_isnan:
   case Intrinsic::dx_legacyf16tof32:
+  case Intrinsic::dx_legacyf32tof16:
   case Intrinsic::dx_rsqrt:
   case Intrinsic::dx_saturate:
   case Intrinsic::dx_splitdouble:

diff  --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index fbc37e968b289..915db6824d7c6 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3929,6 +3929,9 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   case Intrinsic::spv_unpackhalf2x16: {
     return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16);
   }
+  case Intrinsic::spv_packhalf2x16: {
+    return selectExtInst(ResVReg, ResType, I, GL::PackHalf2x16);
+  }
   case Intrinsic::spv_ddx:
     return selectDerivativeInst(ResVReg, ResType, I, SPIRV::OpDPdx);
   case Intrinsic::spv_ddy:

diff  --git a/llvm/test/CodeGen/DirectX/f32tof16.ll b/llvm/test/CodeGen/DirectX/f32tof16.ll
new file mode 100644
index 0000000000000..b48ef24c2b08a
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/f32tof16.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s
+
+define hidden noundef i32 @_Z11test_scalarj(float noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK : [[UINT:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float %p0)
+  ; CHECK : ret i32 [[UINT]]
+  %hlsl.f32tof16 = tail call i32 @llvm.dx.legacyf32tof16.i32(float %p0)
+  ret i32 %hlsl.f32tof16
+}
+
+define hidden noundef <2 x i32> @_Z10test_uint2Dv2_j(<2 x float> noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK: [[FLOAT2_0:%.*]] = extractelement <2 x float> %p0, i64 0
+  ; CHECK: [[UINT_0:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT2_0]])
+  ; CHECK: [[FLOAT2_1:%.*]] = extractelement <2 x float> %p0, i64 1
+  ; CHECK: [[UINT_1:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT2_1]])
+  ; CHECK: [[UINT2_0:%.*]] = insertelement <2 x i32> poison, i32 [[UINT_0]], i64 0
+  ; CHECK: [[UINT2_1:%.*]] = insertelement <2 x i32> [[UINT2_0]], i32 [[UINT_1]], i64 1
+  ; CHECK : ret <2 x i32>  [[UINT2_1]]
+  %hlsl.f32tof16 = tail call <2 x i32> @llvm.dx.legacyf32tof16.v2i32(<2 x float> %p0)
+  ret <2 x i32> %hlsl.f32tof16
+}
+
+define hidden noundef <3 x i32> @_Z10test_uint3Dv3_j(<3 x float> noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK: [[FLOAT3_0:%.*]] = extractelement <3 x float> %p0, i64 0
+  ; CHECK: [[UINT_0:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT3_0]])
+  ; CHECK: [[FLOAT3_1:%.*]] = extractelement <3 x float> %p0, i64 1
+  ; CHECK: [[UINT_1:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT3_1]])
+  ; CHECK: [[FLOAT3_2:%.*]] = extractelement <3 x float> %p0, i64 2
+  ; CHECK: [[UINT_2:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT3_2]])
+  ; CHECK: [[UINT3_0:%.*]] = insertelement <3 x i32> poison, i32 [[UINT_0]], i64 0
+  ; CHECK: [[UINT3_1:%.*]] = insertelement <3 x i32> [[UINT3_0]], i32 [[UINT_1]], i64 1
+  ; CHECK: [[UINT3_2:%.*]] = insertelement <3 x i32> [[UINT3_1]], i32 [[UINT_2]], i64 2
+  ; CHECK : ret <3 x i32>  [[UINT3_2]]
+  %hlsl.f32tof16 = tail call <3 x i32> @llvm.dx.legacyf32tof16.v3f32(<3 x float> %p0)
+  ret <3 x i32> %hlsl.f32tof16
+}
+
+define hidden noundef <4 x i32> @_Z10test_uint4Dv4_j(<4 x float> noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK: [[FLOAT4_0:%.*]] = extractelement <4 x float> %p0, i64 0
+  ; CHECK: [[UINT_0:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT4_0]])
+  ; CHECK: [[FLOAT4_1:%.*]] = extractelement <4 x float> %p0, i64 1
+  ; CHECK: [[UINT_1:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT4_1]])
+  ; CHECK: [[FLOAT4_2:%.*]] = extractelement <4 x float> %p0, i64 2
+  ; CHECK: [[UINT_2:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT4_2]])
+  ; CHECK: [[FLOAT4_3:%.*]] = extractelement <4 x float> %p0, i64 3
+  ; CHECK: [[UINT_3:%.*]] = call i32 @dx.op.legacyF32ToF16(i32 130, float [[FLOAT4_3]])
+  ; CHECK: [[UINT4_0:%.*]] = insertelement <4 x i32> poison, i32 [[UINT_0]], i64 0
+  ; CHECK: [[UINT4_1:%.*]] = insertelement <4 x i32> [[UINT4_0]], i32 [[UINT_1]], i64 1
+  ; CHECK: [[UINT4_2:%.*]] = insertelement <4 x i32> [[UINT4_1]], i32 [[UINT_2]], i64 2
+  ; CHECK: [[UINT4_3:%.*]] = insertelement <4 x i32> [[UINT4_2]], i32 [[UINT_3]], i64 3
+  ; CHECK : ret <4 x i32>  [[UINT4_3]]
+  %hlsl.f32tof16 = tail call <4 x i32> @llvm.dx.legacyf32tof16.v4i32(<4 x float> %p0)
+  ret <4 x i32> %hlsl.f32tof16
+}

diff  --git a/llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll b/llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll
new file mode 100644
index 0000000000000..371d51d68e8b8
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/opencl/packhalf2x16-error.ll
@@ -0,0 +1,10 @@
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: not llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: LLVM ERROR: %6:id(s64) = G_INTRINSIC intrinsic(@llvm.spv.packhalf2x16), %0:vfid(<2 x s64>) is only supported with the GLSL extended instruction set.
+
+define hidden spir_func noundef i32 @_Z9test_funcj(<2 x float> noundef %0) local_unnamed_addr #0 {
+  %2 = tail call i32 @llvm.spv.packhalf2x16.i32(<2 x float> %0)
+  ret i32 %2
+}
+

diff  --git a/llvm/test/CodeGen/SPIRV/packhalf2x16.ll b/llvm/test/CodeGen/SPIRV/packhalf2x16.ll
new file mode 100644
index 0000000000000..570d38e6bb7af
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/packhalf2x16.ll
@@ -0,0 +1,15 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32
+; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2
+; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[FLOAT2]]
+; CHECK: [[PACK:%.*]] = OpExtInst [[UINT]] [[SET]] PackHalf2x16 [[P0]]
+; CHECK: OpReturnValue [[PACK]]
+define hidden spir_func noundef i32 @_Z9test_funcj(<2 x float> noundef %0) local_unnamed_addr #0 {
+  %2 = tail call i32 @llvm.spv.packhalf2x16.v2f32(<2 x float> %0)
+  ret i32 %2
+}