[llvm] 89ec96b - [HLSL] Implement the f16tof32() intrinsic (#165860)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 09:04:46 PST 2025
Author: Tim Corringham
Date: 2025-11-04T17:04:39Z
New Revision: 89ec96b8b4f4a3115689b045cd64afae1c28044e
URL: https://github.com/llvm/llvm-project/commit/89ec96b8b4f4a3115689b045cd64afae1c28044e
DIFF: https://github.com/llvm/llvm-project/commit/89ec96b8b4f4a3115689b045cd64afae1c28044e.diff
LOG: [HLSL] Implement the f16tof32() intrinsic (#165860)
Implement the f16tof32() intrinsic, including DXILand SPIRV codegen, and
associated tests.
Fixes #99112
---------
Co-authored-by: Tim Corringham <tcorring at amd.com>
Added:
clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl
clang/test/CodeGenHLSL/builtins/f16tof32.hlsl
clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
llvm/test/CodeGen/DirectX/f16tof32.ll
llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll
Modified:
clang/include/clang/Basic/Builtins.td
clang/lib/CodeGen/CGHLSLBuiltins.cpp
clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
clang/lib/Sema/SemaHLSL.cpp
llvm/include/llvm/IR/IntrinsicsDirectX.td
llvm/include/llvm/IR/IntrinsicsSPIRV.td
llvm/lib/Target/DirectX/DXIL.td
llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 2b400b012d6ed..0275447e1090a 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5235,6 +5235,12 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate {
let Prototype = "T(unsigned int, T)";
}
+def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> {
+ let Spellings = ["__builtin_hlsl_elementwise_f16tof32"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
// Builtins for XRay.
def XRayCustomEvent : Builtin {
let Spellings = ["__xray_customevent"];
diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
index fbf4a5722caed..b6928ce7d9c44 100644
--- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp
+++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp
@@ -160,6 +160,57 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
return LastInst;
}
+static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF,
+ const CallExpr *E) {
+ Value *Op0 = CGF.EmitScalarExpr(E->getArg(0));
+ QualType Op0Ty = E->getArg(0)->getType();
+ llvm::Type *ResType = CGF.FloatTy;
+ uint64_t NumElements = 0;
+ if (Op0->getType()->isVectorTy()) {
+ NumElements =
+ E->getArg(0)->getType()->castAs<clang::VectorType>()->getNumElements();
+ ResType =
+ llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements));
+ }
+ if (!Op0Ty->hasUnsignedIntegerRepresentation())
+ llvm_unreachable(
+ "f16tof32 operand must have an unsigned int representation");
+
+ if (CGF.CGM.getTriple().isDXIL())
+ return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf16tof32,
+ ArrayRef<Value *>{Op0}, nullptr,
+ "hlsl.f16tof32");
+
+ if (CGF.CGM.getTriple().isSPIRV()) {
+ // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the
+ // Int16 and Float16 capabilities
+ auto UnpackType =
+ llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2));
+ if (NumElements == 0) {
+ // a scalar input - simply extract the first element of the unpacked
+ // vector
+ Value *Unpack = CGF.Builder.CreateIntrinsic(
+ UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef<Value *>{Op0});
+ return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
+ } else {
+ // a vector input - build a congruent output vector by iterating through
+ // the input vector calling unpackhalf2x16 for each element
+ Value *Result = PoisonValue::get(ResType);
+ for (uint64_t i = 0; i < NumElements; i++) {
+ Value *InVal = CGF.Builder.CreateExtractElement(Op0, i);
+ Value *Unpack = CGF.Builder.CreateIntrinsic(
+ UnpackType, Intrinsic::spv_unpackhalf2x16,
+ ArrayRef<Value *>{InVal});
+ Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0);
+ Result = CGF.Builder.CreateInsertElement(Result, Res, i);
+ }
+ return Result;
+ }
+ }
+
+ llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture");
+}
+
static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr,
LValue &Stride) {
// Figure out the stride of the buffer elements from the handle type.
@@ -579,6 +630,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
/*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(),
ArrayRef<Value *>{X}, nullptr, "hlsl.degrees");
}
+ case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
+ return handleElementwiseF16ToF32(*this, E);
+ }
case Builtin::BI__builtin_hlsl_elementwise_frac: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
index a918af39e4074..4c5861c2c5f9d 100644
--- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h
@@ -1052,6 +1052,27 @@ float3 exp2(float3);
_HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2)
float4 exp2(float4);
+//===----------------------------------------------------------------------===//
+// f16tof32 builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn float f16tof32(uint x)
+/// \brief Returns the half value stored in the low 16 bits of the uint arg
+/// converted to a float.
+/// \param x The uint containing two half values.
+///
+/// The float value of the half value found in the low 16 bits of the \a xi
+/// parameter.
+
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float f16tof32(uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float2 f16tof32(uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float3 f16tof32(uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32)
+float4 f16tof32(uint4);
+
//===----------------------------------------------------------------------===//
// firstbithigh builtins
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 94a490a8f68dc..b9707f0036765 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -2802,6 +2802,23 @@ static bool CheckUnsignedIntRepresentation(Sema *S, SourceLocation Loc,
return false;
}
+static bool CheckExpectedBitWidth(Sema *S, CallExpr *TheCall,
+ unsigned ArgOrdinal, unsigned Width) {
+ QualType ArgTy = TheCall->getArg(0)->getType();
+ if (auto *VTy = ArgTy->getAs<VectorType>())
+ ArgTy = VTy->getElementType();
+ // ensure arg type has expected bit width
+ uint64_t ElementBitCount =
+ S->getASTContext().getTypeSizeInChars(ArgTy).getQuantity() * 8;
+ if (ElementBitCount != Width) {
+ S->Diag(TheCall->getArg(0)->getBeginLoc(),
+ diag::err_integer_incorrect_bit_count)
+ << Width << ElementBitCount;
+ return true;
+ }
+ return false;
+}
+
static void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall,
QualType ReturnType) {
auto *VecTyA = TheCall->getArg(0)->getType()->getAs<VectorType>();
@@ -2961,24 +2978,16 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
CheckUnsignedIntVecRepresentation))
return true;
- auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
// ensure arg integers are 32-bits
- uint64_t ElementBitCount = getASTContext()
- .getTypeSizeInChars(VTy->getElementType())
- .getQuantity() *
- 8;
- if (ElementBitCount != 32) {
- SemaRef.Diag(TheCall->getBeginLoc(),
- diag::err_integer_incorrect_bit_count)
- << 32 << ElementBitCount;
+ if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32))
return true;
- }
// ensure both args are vectors of total bit size of a multiple of 64
+ auto *VTy = TheCall->getArg(0)->getType()->getAs<VectorType>();
int NumElementsArg = VTy->getNumElements();
if (NumElementsArg != 2 && NumElementsArg != 4) {
SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count)
- << 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount;
+ << 1 /*a multiple of*/ << 64 << NumElementsArg * 32;
return true;
}
@@ -3295,7 +3304,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
break;
}
// Note these are llvm builtins that we want to catch invalid intrinsic
- // generation. Normal handling of these builitns will occur elsewhere.
+ // generation. Normal handling of these builtins will occur elsewhere.
case Builtin::BI__builtin_elementwise_bitreverse: {
// does not include a check for number of arguments
// because that is done previously
@@ -3405,6 +3414,30 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
}
break;
}
+ case Builtin::BI__builtin_hlsl_elementwise_f16tof32: {
+ if (SemaRef.checkArgCount(TheCall, 1))
+ return true;
+ if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall,
+ CheckUnsignedIntRepresentation))
+ return true;
+ // ensure arg integers are 32 bits
+ if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32))
+ return true;
+ // check it wasn't a bool type
+ QualType ArgTy = TheCall->getArg(0)->getType();
+ if (auto *VTy = ArgTy->getAs<VectorType>())
+ ArgTy = VTy->getElementType();
+ if (ArgTy->isBooleanType()) {
+ SemaRef.Diag(TheCall->getArg(0)->getBeginLoc(),
+ diag::err_builtin_invalid_arg_type)
+ << 1 << /* scalar or vector of */ 5 << /* unsigned int */ 3
+ << /* no fp */ 0 << TheCall->getArg(0)->getType();
+ return true;
+ }
+
+ SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy);
+ break;
+ }
}
return false;
}
diff --git a/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl
new file mode 100644
index 0000000000000..65dba664bb5ea
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s
+
+// CHECK: define hidden noundef nofpclass(nan inf) float
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0)
+// CHECK: ret float %hlsl.f16tof32
+// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32)
+float test_scalar(uint p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <2 x float>
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0)
+// CHECK: ret <2 x float> %hlsl.f16tof32
+// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>)
+float2 test_uint2(uint2 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0)
+// CHECK: ret <3 x float> %hlsl.f16tof32
+// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>)
+float3 test_uint3(uint3 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0)
+// CHECK: ret <4 x float> %hlsl.f16tof32
+// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>)
+float4 test_uint4(uint4 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); }
+
+
+
diff --git a/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl b/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl
new file mode 100644
index 0000000000000..b68bc197f16c5
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \
+// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
+// RUN: -o - | FileCheck %s
+
+// CHECK: define hidden noundef nofpclass(nan inf) float
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0)
+// CHECK: ret float %hlsl.f16tof32
+// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32)
+float test_scalar(uint p0) { return f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <2 x float>
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0)
+// CHECK: ret <2 x float> %hlsl.f16tof32
+// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>)
+float2 test_uint2(uint2 p0) { return f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0)
+// CHECK: ret <3 x float> %hlsl.f16tof32
+// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>)
+float3 test_uint3(uint3 p0) { return f16tof32(p0); }
+
+// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 {
+// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0)
+// CHECK: ret <4 x float> %hlsl.f16tof32
+// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>)
+float4 test_uint4(uint4 p0) { return f16tof32(p0); }
+
+
+
diff --git a/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
new file mode 100644
index 0000000000000..8f2f9308ed966
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify
+
+float builtin_f16tof32_too_few_arg() {
+ return __builtin_hlsl_elementwise_f16tof32();
+ // expected-error at -1 {{too few arguments to function call, expected 1, have 0}}
+ // expected-note at hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}}
+}
+
+float builtin_f16tof32_too_many_arg(uint p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0, p0);
+ // expected-error at -1 {{too many arguments to function call, expected 1, have 2}}
+ // expected-note at hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}}
+}
+
+float builtin_f16tof32_bool(bool p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}}
+}
+
+float builtin_f16tof32_bool4(bool4 p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool4' (aka 'vector<bool, 4>')}}
+}
+
+float builtin_f16tof32_short(short p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'short')}}
+}
+
+float builtin_f16tof32_unsigned_short(unsigned short p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
+}
+
+float builtin_f16tof32_int(int p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}}
+}
+
+float builtin_f16tof32_int64_t(long p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}}
+}
+
+float2 builtin_f16tof32_int2_to_float2_promotion(int2 p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int2' (aka 'vector<int, 2>'))}}
+}
+
+float builtin_f16tof32_half(half p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}}
+}
+
+float builtin_f16tof32_half4(half4 p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half4' (aka 'vector<half, 4>'))}}
+}
+
+float builtin_f16tof32_float(float p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}}
+}
+
+float builtin_f16tof32_double(double p0) {
+ return __builtin_hlsl_elementwise_f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}}
+}
+
+float f16tof32_too_few_arg() {
+ return f16tof32();
+ // expected-error at -1 {{no matching function for call to 'f16tof32'}}
+}
+
+float f16tof32_too_many_arg(uint p0) {
+ return f16tof32(p0, p0);
+ // expected-error at -1 {{no matching function for call to 'f16tof32'}}
+}
+
+float f16tof32_bool(bool p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}}
+}
+
+float f16tof32_bool3(bool3 p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector<bool, 3>'))}}
+}
+
+
+float f16tof32_int16_t(short p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'short')}}
+}
+
+float f16tof32_int16_t(unsigned short p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{incorrect number of bits in integer (expected 32 bits, have 16)}}
+}
+
+float f16tof32_int(int p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}}
+}
+
+float f16tof32_int64_t(long p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}}
+}
+
+float2 f16tof32_int2_to_float2_promotion(int3 p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int3' (aka 'vector<int, 3>'))}}
+}
+
+float f16tof32_half(half p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}}
+}
+
+float f16tof32_half2(half2 p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half2' (aka 'vector<half, 2>'))}}
+}
+
+float f16tof32_float(float p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}}
+}
+
+float f16tof32_double(double p0) {
+ return f16tof32(p0);
+ // expected-error at -1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}}
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index d6b85630eb979..9924b905aee63 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -140,6 +140,9 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1
def int_dx_isnan : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[llvm_anyfloat_ty], [IntrNoMem]>;
+def int_dx_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>],
+ [llvm_anyint_ty], [IntrNoMem]>;
+
def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>],
[IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index bc51fb639fd75..f39c6cda2c579 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -200,4 +200,7 @@ def int_spv_resource_nonuniformindex
def int_spv_generic_cast_to_ptr_explicit
: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty],
[IntrNoMem, NoUndef<RetIndex>]>;
+
+ def int_spv_unpackhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i32_ty], [IntrNoMem]>;
+
}
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 7ae500a55b92d..67437f6969b27 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -1079,6 +1079,15 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> {
let attributes = [Attributes<DXIL1_0, []>];
}
+def LegacyF16ToF32 : DXILOp<131, legacyF16ToF32> {
+ let Doc = "returns the float16 stored in the low-half of the uint converted "
+ "to a float";
+ let intrinsics = [IntrinSelect<int_dx_legacyf16tof32>];
+ let arguments = [Int32Ty];
+ let result = FloatTy;
+ let stages = [Stages<DXIL1_0, [all_stages]>];
+}
+
def WaveAllBitCount : DXILOp<135, waveAllOp> {
let Doc = "returns the count of bits set to 1 across the wave";
let intrinsics = [IntrinSelect<int_dx_wave_active_countbits>];
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
index 60dfd9650937c..6cacbf6564db2 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -29,11 +29,12 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
int OpdIdx) const {
switch (ID) {
case Intrinsic::dx_asdouble:
- case Intrinsic::dx_isinf:
- case Intrinsic::dx_isnan:
case Intrinsic::dx_firstbitlow:
- case Intrinsic::dx_firstbituhigh:
case Intrinsic::dx_firstbitshigh:
+ case Intrinsic::dx_firstbituhigh:
+ case Intrinsic::dx_isinf:
+ case Intrinsic::dx_isnan:
+ case Intrinsic::dx_legacyf16tof32:
return OpdIdx == 0;
default:
return OpdIdx == -1;
@@ -50,6 +51,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
case Intrinsic::dx_frac:
case Intrinsic::dx_isinf:
case Intrinsic::dx_isnan:
+ case Intrinsic::dx_legacyf16tof32:
case Intrinsic::dx_rsqrt:
case Intrinsic::dx_saturate:
case Intrinsic::dx_splitdouble:
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 3f0424f436c72..245e5a2894604 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -3516,6 +3516,10 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_resource_nonuniformindex: {
return selectResourceNonUniformIndex(ResVReg, ResType, I);
}
+ case Intrinsic::spv_unpackhalf2x16: {
+ return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16);
+ }
+
default: {
std::string DiagMsg;
raw_string_ostream OS(DiagMsg);
diff --git a/llvm/test/CodeGen/DirectX/f16tof32.ll b/llvm/test/CodeGen/DirectX/f16tof32.ll
new file mode 100644
index 0000000000000..edc5c1942e8bd
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/f16tof32.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s
+
+define hidden noundef nofpclass(nan inf) float @_Z11test_scalarj(i32 noundef %p0) local_unnamed_addr #0 {
+entry:
+ ; CHECK : [[UINT:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 %p0)
+ ; CHECK : ret float [[UINT]]
+ %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %p0)
+ ret float %hlsl.f16tof32
+}
+
+define hidden noundef nofpclass(nan inf) <2 x float> @_Z10test_uint2Dv2_j(<2 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+ ; CHECK: [[UINT2_0:%.*]] = extractelement <2 x i32> %p0, i64 0
+ ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT2_0]])
+ ; CHECK: [[UINT2_1:%.*]] = extractelement <2 x i32> %p0, i64 1
+ ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT2_1]])
+ ; CHECK: [[FLOAT2_0:%.*]] = insertelement <2 x float> poison, float [[FLOAT_0]], i64 0
+ ; CHECK: [[FLOAT2_1:%.*]] = insertelement <2 x float> [[FLOAT2_0]], float [[FLOAT_1]], i64 1
+ ; CHECK : ret <2 x float> [[FLOAT2_1]]
+ %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %p0)
+ ret <2 x float> %hlsl.f16tof32
+}
+
+define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+ ; CHECK: [[UINT3_0:%.*]] = extractelement <3 x i32> %p0, i64 0
+ ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_0]])
+ ; CHECK: [[UINT3_1:%.*]] = extractelement <3 x i32> %p0, i64 1
+ ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_1]])
+ ; CHECK: [[UINT3_2:%.*]] = extractelement <3 x i32> %p0, i64 2
+ ; CHECK: [[FLOAT_2:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_2]])
+ ; CHECK: [[FLOAT3_0:%.*]] = insertelement <3 x float> poison, float [[FLOAT_0]], i64 0
+ ; CHECK: [[FLOAT3_1:%.*]] = insertelement <3 x float> [[FLOAT3_0]], float [[FLOAT_1]], i64 1
+ ; CHECK: [[FLOAT3_2:%.*]] = insertelement <3 x float> [[FLOAT3_1]], float [[FLOAT_2]], i64 2
+ ; CHECK : ret <3 x float> [[FLOAT3_2]]
+ %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %p0)
+ ret <3 x float> %hlsl.f16tof32
+}
+
+define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) local_unnamed_addr #0 {
+entry:
+ ; CHECK: [[UINT4_0:%.*]] = extractelement <4 x i32> %p0, i64 0
+ ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_0]])
+ ; CHECK: [[UINT4_1:%.*]] = extractelement <4 x i32> %p0, i64 1
+ ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_1]])
+ ; CHECK: [[UINT4_2:%.*]] = extractelement <4 x i32> %p0, i64 2
+ ; CHECK: [[FLOAT_2:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_2]])
+ ; CHECK: [[UINT4_3:%.*]] = extractelement <4 x i32> %p0, i64 3
+ ; CHECK: [[FLOAT_3:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_3]])
+ ; CHECK: [[FLOAT4_0:%.*]] = insertelement <4 x float> poison, float [[FLOAT_0]], i64 0
+ ; CHECK: [[FLOAT4_1:%.*]] = insertelement <4 x float> [[FLOAT4_0]], float [[FLOAT_1]], i64 1
+ ; CHECK: [[FLOAT4_2:%.*]] = insertelement <4 x float> [[FLOAT4_1]], float [[FLOAT_2]], i64 2
+ ; CHECK: [[FLOAT4_3:%.*]] = insertelement <4 x float> [[FLOAT4_2]], float [[FLOAT_3]], i64 3
+ ; CHECK : ret <4 x float> [[FLOAT4_3]]
+ %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %p0)
+ ret <4 x float> %hlsl.f16tof32
+}
diff --git a/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll b/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll
new file mode 100644
index 0000000000000..6a9ce4515f5c0
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450"
+; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0
+; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32
+; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2
+
+; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT]]
+; CHECK: [[UNPACK2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0]]
+; CHECK: [[UNPACK:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2]] 0
+; CHECK: OpReturnValue [[UNPACK]]
+define hidden spir_func noundef nofpclass(nan inf) float @_Z9test_funcj(i32 noundef %0) local_unnamed_addr #0 {
+ %2 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.spv.unpackhalf2x16.v2f32(i32 %0)
+ %3 = extractelement <2 x float> %2, i64 0
+ ret float %3
+}
+
More information about the llvm-commits
mailing list