[clang] [llvm] [HLSL] Implement the f16tof32() intrinsic (PR #165860)

Fri Oct 31 06:29:33 PDT 2025

https://github.com/tcorringham created https://github.com/llvm/llvm-project/pull/165860

Implement the f16tof32() intrinsic, including DXILand SPIRV codegen, and associated tests.

Fixes #99112

>From d8706d621ecd2f84ba645152d3ab015ce2d73553 Mon Sep 17 00:00:00 2001
From: Tim Corringham <tcorring at amd.com>
Date: Wed, 1 Oct 2025 17:48:36 +0100
Subject: [PATCH] [HLSL] Implement the f16tof32() intrinsic

Implement the f16tof32() intrinsic, including DXILand SPIRV codegen,
and associated tests.
---
 clang/include/clang/Basic/Builtins.td         |   6 +
 clang/lib/CodeGen/CGHLSLBuiltins.cpp          |  16 +++
 clang/lib/CodeGen/CGHLSLRuntime.h             |   1 +
 .../lib/Headers/hlsl/hlsl_alias_intrinsics.h  |  21 +++
 clang/lib/Sema/SemaHLSL.cpp                   |  57 ++++++--
 .../builtins/f16tof32-builtin.hlsl            |  30 ++++
 clang/test/CodeGenHLSL/builtins/f16tof32.hlsl |  30 ++++
 .../SemaHLSL/BuiltIns/f16tof32-errors.hlsl    | 134 ++++++++++++++++++
 llvm/include/llvm/IR/IntrinsicsDirectX.td     |   3 +
 llvm/include/llvm/IR/IntrinsicsSPIRV.td       |   4 +
 llvm/lib/Target/DirectX/DXIL.td               |   9 ++
 .../DirectX/DirectXTargetTransformInfo.cpp    |   8 +-
 .../Target/SPIRV/SPIRVInstructionSelector.cpp |  90 ++++++++++++
 llvm/test/CodeGen/DirectX/f16tof32.ll         |  57 ++++++++
 .../CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll |  76 ++++++++++
 15 files changed, 527 insertions(+), 15 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl
 create mode 100644 clang/test/CodeGenHLSL/builtins/f16tof32.hlsl
 create mode 100644 clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
 create mode 100644 llvm/test/CodeGen/DirectX/f16tof32.ll
 create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll

diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index a350acdf146ab..8bdff10d57745 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5222,6 +5222,12 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate {
   let Prototype = "T(unsigned int, T)";
 }
 
+def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_f16tof32"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index 384bd59e7533a..d5aebd6d64817 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -560,6 +560,22 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
         /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
         ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    llvm::Type *Xty = Op0->getType();
+    llvm::Type *retType = llvm::Type::getFloatTy(this->getLLVMContext());
+    if (Xty->isVectorTy()) {
+      auto *XVecTy = E->getArg(0)->getType()->castAs<VectorType>();
+      retType = llvm::VectorType::get(
+          retType, ElementCount::getFixed(XVecTy->getNumElements()));
+    }
+    if (!E->getArg(0)->getType()->hasUnsignedIntegerRepresentation())
+      llvm_unreachable(
+          "f16tof32 operand must have an unsigned int representation");
+    return Builder.CreateIntrinsic(
+        retType, CGM.getHLSLRuntime().getLegacyF16ToF32Intrinsic(),
+        ArrayRef<Value *>{Op0}, nullptr, "hlsl.f16tof32");
+  }
   case Builtin::BI__builtin_hlsl_elementwise_frac: {
     Value *Op0 = EmitScalarExpr(E->getArg(0));
     if (!E->getArg(0)->getType()->hasFloatingRepresentation())
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index 103b4a98f6c26..e36e89fe16125 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -96,6 +96,7 @@ class CGHLSLRuntime {
                                    flattened_thread_id_in_group)
   GENERATE_HLSL_INTRINSIC_FUNCTION(IsInf, isinf)
   GENERATE_HLSL_INTRINSIC_FUNCTION(IsNaN, isnan)
+  GENERATE_HLSL_INTRINSIC_FUNCTION(LegacyF16ToF32, legacyf16tof32)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Lerp, lerp)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Normalize, normalize)
   GENERATE_HLSL_INTRINSIC_FUNCTION(Rsqrt, rsqrt)
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index d973371312701..a85accdcc4549 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1052,6 +1052,27 @@ float3 exp2(float3);
 _HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
 float4 exp2(float4);
 
+//===----------------------------------------------------------------------===//
+// f16tof32 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn float f16tof32(uint x)
+/// \brief Returns the half value stored in the low 16 bits of the uint arg
+/// converted to a float.
+/// \param x The uint containing two half values.
+///
+/// The float value of the half value found in the low 16 bits of the \a xi
+/// parameter.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float f16tof32(uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float2 f16tof32(uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float3 f16tof32(uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float4 f16tof32(uint4);
+
 //===----------------------------------------------------------------------===//
 // firstbithigh builtins
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 5b3e89f936327..be2a85442c9b6 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2738,6 +2738,23 @@ static bool CheckUnsignedIntRepresentation(Sema *S, SourceLocation Loc,
   return false;
 }
 
+static bool CheckExpectedBitWidth(Sema *S, CallExpr *TheCall,
+                                  unsigned ArgOrdinal, unsigned Width) {
+  QualType ArgTy = TheCall->getArg(0)->getType();
+  if (auto *VTy = ArgTy->getAs<VectorType>())
+    ArgTy = VTy->getElementType();
+  // ensure arg type has expected bit width
+  uint64_t ElementBitCount =
+      S->getASTContext().getTypeSizeInChars(ArgTy).getQuantity() * 8;
+  if (ElementBitCount != Width) {
+    S->Diag(TheCall->getArg(0)->getBeginLoc(),
+            diag::err_integer_incorrect_bit_count)
+        << Width << ElementBitCount;
+    return true;
+  }
+  return false;
+}
+
 static void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall,
                                        QualType ReturnType) {
   auto *VecTyA = TheCall->getArg(0)->getType()->getAs<VectorType>();
@@ -2897,24 +2914,16 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
                                    CheckUnsignedIntVecRepresentation))
       return true;
 
-    auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
     // ensure arg integers are 32-bits
-    uint64_t ElementBitCount = getASTContext()
-                                   .getTypeSizeInChars(VTy->getElementType())
-                                   .getQuantity() *
-                               8;
-    if (ElementBitCount != 32) {
-      SemaRef.Diag(TheCall->getBeginLoc(),
-                   diag::err_integer_incorrect_bit_count)
-          << 32 << ElementBitCount;
+    if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32))
       return true;
-    }
 
     // ensure both args are vectors of total bit size of a multiple of 64
+    auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
     int NumElementsArg = VTy->getNumElements();
     if (NumElementsArg != 2 && NumElementsArg != 4) {
       SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count)
-          << 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount;
+          << 1 /*a multiple of*/ << 64 << NumElementsArg * 32;
       return true;
     }
 
@@ -3230,7 +3239,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     break;
   }
   // Note these are llvm builtins that we want to catch invalid intrinsic
-  // generation. Normal handling of these builitns will occur elsewhere.
+  // generation. Normal handling of these builtins will occur elsewhere.
   case Builtin::BI__builtin_elementwise_bitreverse: {
     // does not include a check for number of arguments
     // because that is done previously
@@ -3340,6 +3349,30 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     }
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
+    if (SemaRef.checkArgCount(TheCall, 1))
+      return true;
+    if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
+                                   CheckUnsignedIntRepresentation))
+      return true;
+    // ensure arg integers are 32 bits
+    if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32))
+      return true;
+    // check it wasn't a bool type
+    QualType ArgTy = TheCall->getArg(0)->getType();
+    if (auto *VTy = ArgTy->getAs<VectorType>())
+      ArgTy = VTy->getElementType();
+    if (ArgTy->isBooleanType()) {
+      SemaRef.Diag(TheCall->getArg(0)->getBeginLoc(),
+                   diag::err_builtin_invalid_arg_type)
+          << 1 << /* scalar or vector of */ 5 << /* unsigned int */ 3
+          << /* no fp */ 0 << TheCall->getArg(0)->getType();
+      return true;
+    }
+
+    SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy);
+    break;
+  }
   }
   return false;
 }
diff --git a/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl
new file mode 100644
index 0000000000000..65dba664bb5ea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef nofpclass(nan inf) float
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0)
+// CHECK: ret float %hlsl.f16tof32
+// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32)
+float test_scalar(uint p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <2 x float>
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0)
+// CHECK: ret <2 x float> %hlsl.f16tof32
+// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>)
+float2 test_uint2(uint2 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0)
+// CHECK: ret <3 x float> %hlsl.f16tof32
+// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>)
+float3 test_uint3(uint3 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0)
+// CHECK: ret <4 x float> %hlsl.f16tof32
+// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>)
+float4 test_uint4(uint4 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+
+
diff --git a/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl b/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl
new file mode 100644
index 0000000000000..b68bc197f16c5
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN:   dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN:   -o - | FileCheck %s
+
+// CHECK: define hidden noundef nofpclass(nan inf) float
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0)
+// CHECK: ret float %hlsl.f16tof32
+// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32)
+float test_scalar(uint p0) { return f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <2 x float>
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0)
+// CHECK: ret <2 x float> %hlsl.f16tof32
+// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>)
+float2 test_uint2(uint2 p0) { return f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0)
+// CHECK: ret <3 x float> %hlsl.f16tof32
+// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>)
+float3 test_uint3(uint3 p0) { return f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0)
+// CHECK: ret <4 x float> %hlsl.f16tof32
+// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>)
+float4 test_uint4(uint4 p0) { return f16tof32(p0); }
+
+
+
diff --git a/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
new file mode 100644
index 0000000000000..2c4baae524977
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify
+
+float builtin_f16tof32_too_few_arg() {
+  return __builtin_hlsl_elementwise_f16tof32();
+  // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+  // expected-note at hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}}
+}
+
+float builtin_f16tof32_too_many_arg(uint p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0, p0);
+  // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+  // expected-note at hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}}
+}
+
+float builtin_f16tof32_bool(bool p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}}
+}
+
+float builtin_f16tof32_bool4(bool4 p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool4' (aka 'vector<bool, 4>')}}
+}
+
+float builtin_f16tof32_int16_t(int16_t p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}}
+}
+
+float builtin_f16tof32_int16_t(unsigned short p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
+}
+
+float builtin_f16tof32_int(int p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}}
+}
+
+float builtin_f16tof32_int64_t(long p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}}
+}
+
+float2 builtin_f16tof32_int2_to_float2_promotion(int2 p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int2' (aka 'vector<int, 2>'))}}
+}
+
+float builtin_f16tof32_half(half p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}}
+}
+
+float builtin_f16tof32_half4(half4 p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half4' (aka 'vector<half, 4>'))}}
+}
+
+float builtin_f16tof32_float(float p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}}
+}
+
+float builtin_f16tof32_double(double p0) {
+  return __builtin_hlsl_elementwise_f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}}
+}
+
+float f16tof32_too_few_arg() {
+  return f16tof32();
+  // expected-error at -1 {{no matching function for call to 'f16tof32'}}
+}
+
+float f16tof32_too_many_arg(uint p0) {
+  return f16tof32(p0, p0);
+  // expected-error at -1 {{no matching function for call to 'f16tof32'}}
+}
+
+float f16tof32_bool(bool p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}}
+}
+
+float f16tof32_bool3(bool3 p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector<bool, 3>')}}
+}
+
+
+float f16tof32_int16_t(int16_t p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int16_t' (aka 'short'))}}
+}
+
+float f16tof32_int16_t(unsigned short p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
+}
+
+float f16tof32_int(int p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}}
+}
+
+float f16tof32_int64_t(long p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}}
+}
+
+float2 f16tof32_int2_to_float2_promotion(int3 p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int3' (aka 'vector<int, 3>'))}}
+}
+
+float f16tof32_half(half p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}}
+}
+
+float f16tof32_half2(half2 p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half2' (aka 'vector<half, 2>'))}}
+}
+
+float f16tof32_float(float p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}}
+}
+
+float f16tof32_double(double p0) {
+  return f16tof32(p0);
+  // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 3b7077c52db21..74a6bc41f8da7 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -140,6 +140,9 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1
 def int_dx_isnan : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
     [llvm_anyfloat_ty], [IntrNoMem]>;
 
+def int_dx_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
+    [llvm_anyint_ty], [IntrNoMem]>;
+
 def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
     [IntrNoMem]>;
 
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 49a182be98acd..655b1aa1dde21 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -198,4 +198,8 @@ def int_spv_resource_nonuniformindex
   def int_spv_generic_cast_to_ptr_explicit
     : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty],
        [IntrNoMem, NoUndef<RetIndex>]>;
+
+  def int_spv_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
+    [llvm_anyint_ty], [IntrNoMem]>;
+
 }
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 44c48305f2832..3ae4e09b2f45d 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -1069,6 +1069,15 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> {
   let attributes = [Attributes<DXIL1_0, []>];
 }
 
+def LegacyF16ToF32 : DXILOp<131, legacyF16ToF32> {
+  let Doc = "returns the float16 stored in the low-half of the uint converted "
+            "to a float";
+  let intrinsics = [IntrinSelect<int_dx_legacyf16tof32>];
+  let arguments = [Int32Ty];
+  let result = FloatTy;
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
 def WaveAllBitCount : DXILOp<135, waveAllOp> {
   let Doc = "returns the count of bits set to 1 across the wave";
   let intrinsics = [IntrinSelect<int_dx_wave_active_countbits>];
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 68fd3e0bc74c7..614a4bab36e67 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -29,11 +29,12 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
                                                             int OpdIdx) const {
   switch (ID) {
   case Intrinsic::dx_asdouble:
-  case Intrinsic::dx_isinf:
-  case Intrinsic::dx_isnan:
   case Intrinsic::dx_firstbitlow:
-  case Intrinsic::dx_firstbituhigh:
   case Intrinsic::dx_firstbitshigh:
+  case Intrinsic::dx_firstbituhigh:
+  case Intrinsic::dx_isinf:
+  case Intrinsic::dx_isnan:
+  case Intrinsic::dx_legacyf16tof32:
     return OpdIdx == 0;
   default:
     return OpdIdx == -1;
@@ -50,6 +51,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
   case Intrinsic::dx_frac:
   case Intrinsic::dx_isinf:
   case Intrinsic::dx_isnan:
+  case Intrinsic::dx_legacyf16tof32:
   case Intrinsic::dx_rsqrt:
   case Intrinsic::dx_saturate:
   case Intrinsic::dx_splitdouble:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 021353ab716f7..12ae8ff2d0478 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -212,6 +212,9 @@ class SPIRVInstructionSelector : public InstructionSelector {
   bool selectOpIsNan(Register ResVReg, const SPIRVType *ResType,
                      MachineInstr &I) const;
 
+  bool selectF16ToF32(Register ResVReg, const SPIRVType *ResType,
+                      MachineInstr &I) const;
+
   template <bool Signed>
   bool selectDot4AddPacked(Register ResVReg, const SPIRVType *ResType,
                            MachineInstr &I) const;
@@ -3472,6 +3475,10 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
   case Intrinsic::spv_resource_nonuniformindex: {
     return selectResourceNonUniformIndex(ResVReg, ResType, I);
   }
+  case Intrinsic::spv_legacyf16tof32: {
+    return selectF16ToF32(ResVReg, ResType, I);
+  }
+
   default: {
     std::string DiagMsg;
     raw_string_ostream OS(DiagMsg);
@@ -3744,6 +3751,89 @@ bool SPIRVInstructionSelector::selectResourceNonUniformIndex(
   return true;
 }
 
+bool SPIRVInstructionSelector::selectF16ToF32(Register ResVReg,
+                                              const SPIRVType *ResType,
+                                              MachineInstr &I) const {
+  assert(I.getNumOperands() == 3);
+  assert(I.getOperand(0).isReg());
+  assert(I.getOperand(2).isReg());
+  Register SrcReg = I.getOperand(2).getReg();
+  const SPIRVType *SrcRegType = GR.getSPIRVTypeForVReg(SrcReg);
+  LLT SrcType = MRI->getType(SrcReg);
+  SPIRVType *SrcEltType = GR.getScalarOrVectorComponentType(SrcRegType);
+  SPIRVType *ResEltType = GR.getScalarOrVectorComponentType(ResType);
+  const TargetRegisterClass *SrcRegClass = GR.getRegClass(SrcEltType);
+  const TargetRegisterClass *ResRegClass = GR.getRegClass(ResEltType);
+  MachineIRBuilder MIRBuilder(I);
+  const SPIRVType *Vec2ResType =
+      GR.getOrCreateSPIRVVectorType(ResEltType, 2, MIRBuilder, false);
+  const TargetRegisterClass *Vec2RegClass = GR.getRegClass(Vec2ResType);
+
+  bool Result = true;
+  MachineBasicBlock &BB = *I.getParent();
+  if (SrcType.isVector()) {
+    // We have a vector of uints to convert elementwise
+    uint64_t ResultSize = GR.getScalarOrVectorComponentCount(ResType);
+    SmallVector<Register> ComponentRegisters;
+    for (uint64_t Idx = 0; Idx < ResultSize; Idx++) {
+      Register EltReg = MRI->createVirtualRegister(SrcRegClass);
+      Register FReg = MRI->createVirtualRegister(ResRegClass);
+      Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass);
+
+      Result =
+          BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
+              .addDef(EltReg)
+              .addUse(GR.getSPIRVTypeID(SrcEltType))
+              .addUse(SrcReg)
+              .addImm(Idx)
+              .constrainAllUses(TII, TRI, RBI);
+
+      Result &=
+          BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpExtInst))
+              .addDef(Vec2Reg)
+              .addUse(GR.getSPIRVTypeID(Vec2ResType))
+              .addImm(
+                  static_cast<uint32_t>(SPIRV::InstructionSet::GLSL_std_450))
+              .addImm(GL::UnpackHalf2x16)
+              .addUse(EltReg)
+              .constrainAllUses(TII, TRI, RBI);
+
+      Result &=
+          BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
+              .addDef(FReg)
+              .addUse(GR.getSPIRVTypeID(ResEltType))
+              .addUse(Vec2Reg)
+              .addImm(0)
+              .constrainAllUses(TII, TRI, RBI);
+
+      ComponentRegisters.emplace_back(FReg);
+    }
+
+    MachineInstrBuilder MIB =
+        BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeConstruct))
+            .addDef(ResVReg)
+            .addUse(GR.getSPIRVTypeID(ResType));
+
+    for (Register ComponentReg : ComponentRegisters)
+      MIB.addUse(ComponentReg);
+    return Result && MIB.constrainAllUses(TII, TRI, RBI);
+
+  } else if (SrcType.isScalar()) {
+    // just a scalar uint to convert
+    Register Vec2Reg = MRI->createVirtualRegister(Vec2RegClass);
+    Result &= selectExtInst(Vec2Reg, Vec2ResType, I, GL::UnpackHalf2x16);
+    Result &=
+        BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCompositeExtract))
+            .addDef(ResVReg)
+            .addUse(GR.getSPIRVTypeID(ResType))
+            .addUse(Vec2Reg)
+            .addImm(0)
+            .constrainAllUses(TII, TRI, RBI);
+    return Result;
+  }
+  return false;
+}
+
 void SPIRVInstructionSelector::decorateUsesAsNonUniform(
     Register &NonUniformReg) const {
   llvm::SmallVector<Register> WorkList = {NonUniformReg};
diff --git a/llvm/test/CodeGen/DirectX/f16tof32.ll b/llvm/test/CodeGen/DirectX/f16tof32.ll
new file mode 100644
index 0000000000000..edc5c1942e8bd
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/f16tof32.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s
+
+define hidden noundef nofpclass(nan inf) float @_Z11test_scalarj(i32 noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK : [[UINT:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 %p0)
+  ; CHECK : ret float [[UINT]]
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %p0)
+  ret float %hlsl.f16tof32
+}
+
+define hidden noundef nofpclass(nan inf) <2 x float> @_Z10test_uint2Dv2_j(<2 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK: [[UINT2_0:%.*]] = extractelement <2 x i32> %p0, i64 0
+  ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT2_0]])
+  ; CHECK: [[UINT2_1:%.*]] = extractelement <2 x i32> %p0, i64 1
+  ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT2_1]])
+  ; CHECK: [[FLOAT2_0:%.*]] = insertelement <2 x float> poison, float [[FLOAT_0]], i64 0
+  ; CHECK: [[FLOAT2_1:%.*]] = insertelement <2 x float> [[FLOAT2_0]], float [[FLOAT_1]], i64 1
+  ; CHECK : ret <2 x float>  [[FLOAT2_1]]
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %p0)
+  ret <2 x float> %hlsl.f16tof32
+}
+
+define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK: [[UINT3_0:%.*]] = extractelement <3 x i32> %p0, i64 0
+  ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_0]])
+  ; CHECK: [[UINT3_1:%.*]] = extractelement <3 x i32> %p0, i64 1
+  ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_1]])
+  ; CHECK: [[UINT3_2:%.*]] = extractelement <3 x i32> %p0, i64 2
+  ; CHECK: [[FLOAT_2:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_2]])
+  ; CHECK: [[FLOAT3_0:%.*]] = insertelement <3 x float> poison, float [[FLOAT_0]], i64 0
+  ; CHECK: [[FLOAT3_1:%.*]] = insertelement <3 x float> [[FLOAT3_0]], float [[FLOAT_1]], i64 1
+  ; CHECK: [[FLOAT3_2:%.*]] = insertelement <3 x float> [[FLOAT3_1]], float [[FLOAT_2]], i64 2
+  ; CHECK : ret <3 x float> [[FLOAT3_2]]
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %p0)
+  ret <3 x float> %hlsl.f16tof32
+}
+
+define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  ; CHECK: [[UINT4_0:%.*]] = extractelement <4 x i32> %p0, i64 0
+  ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_0]])
+  ; CHECK: [[UINT4_1:%.*]] = extractelement <4 x i32> %p0, i64 1
+  ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_1]])
+  ; CHECK: [[UINT4_2:%.*]] = extractelement <4 x i32> %p0, i64 2
+  ; CHECK: [[FLOAT_2:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_2]])
+  ; CHECK: [[UINT4_3:%.*]] = extractelement <4 x i32> %p0, i64 3
+  ; CHECK: [[FLOAT_3:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_3]])
+  ; CHECK: [[FLOAT4_0:%.*]] = insertelement <4 x float> poison, float [[FLOAT_0]], i64 0
+  ; CHECK: [[FLOAT4_1:%.*]] = insertelement <4 x float> [[FLOAT4_0]], float [[FLOAT_1]], i64 1
+  ; CHECK: [[FLOAT4_2:%.*]] = insertelement <4 x float> [[FLOAT4_1]], float [[FLOAT_2]], i64 2
+  ; CHECK: [[FLOAT4_3:%.*]] = insertelement <4 x float> [[FLOAT4_2]], float [[FLOAT_3]], i64 3
+  ; CHECK : ret <4 x float> [[FLOAT4_3]]
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %p0)
+  ret <4 x float> %hlsl.f16tof32
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll
new file mode 100644
index 0000000000000..883fcbd02e769
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/f16tof32.ll
@@ -0,0 +1,76 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0
+; CHECK-DAG: [[UINT2:%.*]] = OpTypeVector [[UINT]] 2
+; CHECK-DAG: [[UINT3:%.*]] = OpTypeVector [[UINT]] 3
+; CHECK-DAG: [[UINT4:%.*]] = OpTypeVector [[UINT]] 4
+; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32
+; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2
+; CHECK-DAG: [[FLOAT3:%.*]] = OpTypeVector [[FLOAT]] 3
+; CHECK-DAG: [[FLOAT4:%.*]] = OpTypeVector [[FLOAT]] 4
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT]]
+; CHECK: [[UNPACK2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0]]
+; CHECK: [[UNPACK:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2]] 0
+; CHECK: OpReturnValue [[UNPACK]]
+define hidden noundef nofpclass(nan inf) float @_Z11test_scalarj(i32 noundef %p0) local_unnamed_addr #0 {
+entry:
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn float @llvm.spv.legacyf16tof32.i32(i32 %p0)
+  ret float %hlsl.f16tof32
+}
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT2]]
+; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0
+; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1
+; CHECK-DAG: [[UNPACK2_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]]
+; CHECK-DAG: [[UNPACK2_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]]
+; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2_0]] 0
+; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2_1]] 0
+; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT2]] [[RESULT_0]] [[RESULT_1]]
+; CHECK: OpReturnValue [[RESULT]]
+define hidden noundef nofpclass(nan inf) <2 x float> @_Z10test_uint2Dv2_j(<2 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.spv.legacyf16tof32.v2i32(<2 x i32> %p0)
+  ret <2 x float> %hlsl.f16tof32
+}
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT3]]
+; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0
+; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1
+; CHECK-DAG: [[P0_2:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 2
+; CHECK-DAG: [[UNPACK3_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]]
+; CHECK-DAG: [[UNPACK3_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]]
+; CHECK-DAG: [[UNPACK3_2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_2]]
+; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_0]] 0
+; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_1]] 0
+; CHECK-DAG: [[RESULT_2:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK3_2]] 0
+; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT3]] [[RESULT_0]] [[RESULT_1]] [[RESULT_2]]
+; CHECK: OpReturnValue [[RESULT]]
+define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.spv.legacyf16tof32.v3i32(<3 x i32> %p0)
+  ret <3 x float> %hlsl.f16tof32
+}
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT4]]
+; CHECK-DAG: [[P0_0:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 0
+; CHECK-DAG: [[P0_1:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 1
+; CHECK-DAG: [[P0_2:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 2
+; CHECK-DAG: [[P0_3:%.*]] = OpCompositeExtract [[UINT]] [[P0]] 3
+; CHECK-DAG: [[UNPACK4_0:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_0]]
+; CHECK-DAG: [[UNPACK4_1:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_1]]
+; CHECK-DAG: [[UNPACK4_2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_2]]
+; CHECK-DAG: [[UNPACK4_3:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0_3]]
+; CHECK-DAG: [[RESULT_0:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_0]] 0
+; CHECK-DAG: [[RESULT_1:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_1]] 0
+; CHECK-DAG: [[RESULT_2:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_2]] 0
+; CHECK-DAG: [[RESULT_3:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK4_3]] 0
+; CHECK: [[RESULT:%.*]] = OpCompositeConstruct [[FLOAT4]] [[RESULT_0]] [[RESULT_1]] [[RESULT_2]] [[RESULT_3]]
+; CHECK: OpReturnValue [[RESULT]]
+define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+  %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.spv.legacyf16tof32.v4i32(<4 x i32> %p0)
+  ret <4 x float> %hlsl.f16tof32
+}