[clang] [llvm] [DirectX] Add isinf f16 emulation for SM6.8 and lower (PR #156932)
Farzon Lotfi via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 4 10:24:45 PDT 2025
https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/156932
>From 8c3186557a2bc96e3f27a7dde10929990a38288a Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 4 Sep 2025 13:10:06 -0400
Subject: [PATCH] [DirectX] Add isinf f16 emulation for SM6.8 and lower
fixes #156068
- We needed to add a new sub arch to the target tripple so we can test that
emulation does not happen when targeting SM6.9
- The HLSL toolchain needed to be updated to handle the conversion of
strings to enums for the new sub arch.
- The emulation is done in DXILIntrinsicExpansion.cpp and needs to be able to convert both llvm.is.fpclass and
lvm.dx.isinf to the proper emulation
- test updates in TargetParser/TripleTest.cpp, isinf.ll, and is_fpclass.ll
---
clang/lib/Driver/ToolChains/HLSL.cpp | 3 ++
llvm/include/llvm/TargetParser/Triple.h | 3 +-
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 44 ++++++++++++++++++-
llvm/lib/TargetParser/Triple.cpp | 9 +++-
llvm/test/CodeGen/DirectX/is_fpclass.ll | 21 ++++++++-
llvm/test/CodeGen/DirectX/isinf.ll | 43 +++++++++++++++---
llvm/unittests/TargetParser/TripleTest.cpp | 13 ++++++
7 files changed, 125 insertions(+), 11 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp
index 660661945d62a..559af32dc3808 100644
--- a/clang/lib/Driver/ToolChains/HLSL.cpp
+++ b/clang/lib/Driver/ToolChains/HLSL.cpp
@@ -132,6 +132,9 @@ std::optional<std::string> tryParseProfile(StringRef Profile) {
case 8:
SubArch = llvm::Triple::DXILSubArch_v1_8;
break;
+ case 9:
+ SubArch = llvm::Triple::DXILSubArch_v1_9;
+ break;
case OfflineLibMinor:
// Always consider minor version x as the latest supported DXIL version
SubArch = llvm::Triple::LatestDXILSubArch;
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index f85984ed4f328..8e12c6852075d 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -180,7 +180,8 @@ class Triple {
DXILSubArch_v1_6,
DXILSubArch_v1_7,
DXILSubArch_v1_8,
- LatestDXILSubArch = DXILSubArch_v1_8,
+ DXILSubArch_v1_9,
+ LatestDXILSubArch = DXILSubArch_v1_9,
};
enum VendorType {
UnknownVendor,
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index ee1db54446cb8..06aade7d0f1e9 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -51,6 +51,43 @@ static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy,
return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
}
+static Value *expand16BitIsInf(CallInst *Orig) {
+ Module *M = Orig->getModule();
+ if (M->getTargetTriple().getDXILVersion() >= VersionTuple(1, 9))
+ return nullptr;
+
+ Value *Val = Orig->getOperand(0);
+ Type *ValTy = Val->getType();
+ if (!(ValTy->isHalfTy() ||
+ (ValTy->isVectorTy() &&
+ cast<FixedVectorType>(ValTy)->getElementType()->isHalfTy())))
+ return nullptr;
+
+ IRBuilder<> Builder(Orig);
+ Type *IType = Type::getInt16Ty(M->getContext());
+ Constant *PosInf =
+ ValTy->isVectorTy()
+ ? ConstantVector::getSplat(
+ ElementCount::getFixed(
+ cast<FixedVectorType>(ValTy)->getNumElements()),
+ ConstantInt::get(IType, 0x7c00))
+ : ConstantInt::get(IType, 0x7c00);
+
+ Constant *NegInf =
+ ValTy->isVectorTy()
+ ? ConstantVector::getSplat(
+ ElementCount::getFixed(
+ cast<FixedVectorType>(ValTy)->getNumElements()),
+ ConstantInt::get(IType, 0xfc00))
+ : ConstantInt::get(IType, 0xfc00);
+
+ Value *IVal = Builder.CreateBitCast(Val, PosInf->getType());
+ Value *B1 = Builder.CreateICmpEQ(IVal, PosInf);
+ Value *B2 = Builder.CreateICmpEQ(IVal, NegInf);
+ Value *B3 = Builder.CreateOr(B1, B2);
+ return B3;
+}
+
static bool isIntrinsicExpansion(Function &F) {
switch (F.getIntrinsicID()) {
case Intrinsic::abs:
@@ -68,6 +105,7 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::dx_sclamp:
case Intrinsic::dx_nclamp:
case Intrinsic::dx_degrees:
+ case Intrinsic::dx_isinf:
case Intrinsic::dx_lerp:
case Intrinsic::dx_normalize:
case Intrinsic::dx_fdot:
@@ -301,9 +339,10 @@ static Value *expandIsFPClass(CallInst *Orig) {
auto *TCI = dyn_cast<ConstantInt>(T);
// These FPClassTest cases have DXIL opcodes, so they will be handled in
- // DXIL Op Lowering instead.
+ // DXIL Op Lowering instead for all non f16 cases.
switch (TCI->getZExtValue()) {
case FPClassTest::fcInf:
+ return expand16BitIsInf(Orig);
case FPClassTest::fcNan:
case FPClassTest::fcNormal:
case FPClassTest::fcFinite:
@@ -873,6 +912,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
case Intrinsic::dx_degrees:
Result = expandDegreesIntrinsic(Orig);
break;
+ case Intrinsic::dx_isinf:
+ Result = expand16BitIsInf(Orig);
+ break;
case Intrinsic::dx_lerp:
Result = expandLerpIntrinsic(Orig);
break;
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index ec15f235b8624..71517e5e9e832 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -158,6 +158,8 @@ StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) {
return "dxilv1.7";
case Triple::DXILSubArch_v1_8:
return "dxilv1.8";
+ case Triple::DXILSubArch_v1_9:
+ return "dxilv1.9";
default:
break;
}
@@ -650,6 +652,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3",
"dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7",
"dxilv1.8", Triple::dxil)
+ // Note: Cases has max limit of 10.
+ .Case("dxilv1.9", Triple::dxil)
.Case("xtensa", Triple::xtensa)
.Default(Triple::UnknownArch);
@@ -842,6 +846,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
.EndsWith("v1.6", Triple::DXILSubArch_v1_6)
.EndsWith("v1.7", Triple::DXILSubArch_v1_7)
.EndsWith("v1.8", Triple::DXILSubArch_v1_8)
+ .EndsWith("v1.9", Triple::DXILSubArch_v1_9)
.Default(Triple::NoSubArch);
StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName);
@@ -1111,7 +1116,7 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) {
VersionTuple Ver =
parseVersionFromName(ShaderModelStr.drop_front(strlen("shadermodel")));
// Default DXIL minor version when Shader Model version is anything other
- // than 6.[0...8] or 6.x (which translates to latest current SM version)
+ // than 6.[0...9] or 6.x (which translates to latest current SM version)
const unsigned SMMajor = 6;
if (!Ver.empty()) {
if (Ver.getMajor() == SMMajor) {
@@ -1135,6 +1140,8 @@ static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) {
return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_7);
case 8:
return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_8);
+ case 9:
+ return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_9);
default:
report_fatal_error("Unsupported Shader Model version", false);
}
diff --git a/llvm/test/CodeGen/DirectX/is_fpclass.ll b/llvm/test/CodeGen/DirectX/is_fpclass.ll
index a628096aacd7d..1796e8bd794d8 100644
--- a/llvm/test/CodeGen/DirectX/is_fpclass.ll
+++ b/llvm/test/CodeGen/DirectX/is_fpclass.ll
@@ -1,5 +1,5 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s --check-prefixes=CHECK,SM69CHECK
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.8-library %s | FileCheck %s --check-prefixes=CHECK,SMOLDCHECK
define noundef i1 @isnegzero(float noundef %a) {
@@ -75,6 +75,23 @@ entry:
ret i1 %0
}
+define noundef i1 @isinfh(half noundef %a) {
+; CHECK-LABEL: define noundef i1 @isinfh(
+; CHECK-SAME: half noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; SM69CHECK-NEXT: [[ISINF:%.*]] = call i1 @dx.op.isSpecialFloat.f16(i32 9, half [[A]]) #[[ATTR0]]
+; SMOLDCHECK-NEXT: [[BITCAST:%.*]] = bitcast half %a to i16
+; SMOLDCHECK-NEXT: [[CMPHIGH:%.*]] = icmp eq i16 [[BITCAST]], 31744
+; SMOLDCHECK-NEXT: [[CMPLOW:%.*]] = icmp eq i16 [[BITCAST]], -1024
+; SMOLDCHECK-NEXT: [[OR:%.*]] = or i1 [[CMPHIGH]], [[CMPLOW]]
+; SMOLDCHECK-NEXT: ret i1 [[OR]]
+; SM69CHECK-NEXT: ret i1 [[ISINF]]
+;
+entry:
+ %0 = call i1 @llvm.is.fpclass.f16(half %a, i32 516)
+ ret i1 %0
+}
+
define noundef <2 x i1> @isinfv2(<2 x float> noundef %a) {
; CHECK-LABEL: define noundef <2 x i1> @isinfv2(
; CHECK-SAME: <2 x float> noundef [[A:%.*]]) {
diff --git a/llvm/test/CodeGen/DirectX/isinf.ll b/llvm/test/CodeGen/DirectX/isinf.ll
index 461553b533ae1..bf31363ee114c 100644
--- a/llvm/test/CodeGen/DirectX/isinf.ll
+++ b/llvm/test/CodeGen/DirectX/isinf.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s --check-prefixes=CHECK,SM69CHECK
+; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.8-library %s | FileCheck %s --check-prefixes=CHECK,SMOLDCHECK
; Make sure dxil operation function calls for isinf are generated for float and half.
@@ -11,17 +12,47 @@ entry:
define noundef i1 @isinf_half(half noundef %a) {
entry:
- ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]]
+ ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}}) #[[#ATTR]]
+ ; SMOLDCHECK: [[BITCAST:%.*]] = bitcast half %a to i16
+ ; SMOLDCHECK: [[CMPHIGH:%.*]] = icmp eq i16 [[BITCAST]], 31744
+ ; SMOLDCHECK: [[CMPLOW:%.*]] = icmp eq i16 [[BITCAST]], -1024
+ ; SMOLDCHECK: [[OR:%.*]] = or i1 [[CMPHIGH]], [[CMPLOW]]
%dx.isinf = call i1 @llvm.dx.isinf.f16(half %a)
ret i1 %dx.isinf
}
define noundef <4 x i1> @isinf_half4(<4 x half> noundef %p0) {
entry:
- ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
- ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
- ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
- ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+ ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+ ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+ ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+ ; SM69CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half
+
+ ; SMOLDCHECK: [[ee0:%.*]] = extractelement <4 x half> %p0, i64 0
+ ; SMOLDCHECK: [[BITCAST0:%.*]] = bitcast half [[ee0]] to i16
+ ; SMOLDCHECK: [[ee1:%.*]] = extractelement <4 x half> %p0, i64 1
+ ; SMOLDCHECK: [[BITCAST1:%.*]] = bitcast half [[ee1]] to i16
+ ; SMOLDCHECK:[[ee2:%.*]] = extractelement <4 x half> %p0, i64 2
+ ; SMOLDCHECK: [[BITCAST2:%.*]] = bitcast half [[ee2]] to i16
+ ; SMOLDCHECK: [[ee3:%.*]] = extractelement <4 x half> %p0, i64 3
+ ; SMOLDCHECK: [[BITCAST3:%.*]] = bitcast half [[ee3]] to i16
+ ; SMOLDCHECK: [[ICMPHIGH0:%.*]] = icmp eq i16 [[BITCAST0]], 31744
+ ; SMOLDCHECK: [[ICMPHIGH1:%.*]] = icmp eq i16 [[BITCAST1]], 31744
+ ; SMOLDCHECK: [[ICMPHIGH2:%.*]] = icmp eq i16 [[BITCAST2]], 31744
+ ; SMOLDCHECK: [[ICMPHIGH3:%.*]] = icmp eq i16 [[BITCAST3]], 31744
+ ; SMOLDCHECK: [[ICMPLOW0:%.*]] = icmp eq i16 [[BITCAST0]], -1024
+ ; SMOLDCHECK: [[ICMPLOW1:%.*]] = icmp eq i16 [[BITCAST1]], -1024
+ ; SMOLDCHECK: [[ICMPLOW2:%.*]] = icmp eq i16 [[BITCAST2]], -1024
+ ; SMOLDCHECK: [[ICMPLOW3:%.*]] = icmp eq i16 [[BITCAST3]], -1024
+ ; SMOLDCHECK: [[OR0:%.*]] = or i1 [[ICMPHIGH0]], [[ICMPLOW0]]
+ ; SMOLDCHECK: [[OR1:%.*]] = or i1 [[ICMPHIGH1]], [[ICMPLOW1]]
+ ; SMOLDCHECK: [[OR2:%.*]] = or i1 [[ICMPHIGH2]], [[ICMPLOW2]]
+ ; SMOLDCHECK: [[OR3:%.*]] = or i1 [[ICMPHIGH3]], [[ICMPLOW3]]
+ ; SMOLDCHECK: %.upto019 = insertelement <4 x i1> poison, i1 [[OR0]], i64 0
+ ; SMOLDCHECK: %.upto120 = insertelement <4 x i1> %.upto019, i1 [[OR1]], i64 1
+ ; SMOLDCHECK: %.upto221 = insertelement <4 x i1> %.upto120, i1 [[OR2]], i64 2
+ ; SMOLDCHECK: %0 = insertelement <4 x i1> %.upto221, i1 [[OR3]], i64 3
+
%hlsl.isinf = call <4 x i1> @llvm.dx.isinf.v4f16(<4 x half> %p0)
ret <4 x i1> %hlsl.isinf
}
diff --git a/llvm/unittests/TargetParser/TripleTest.cpp b/llvm/unittests/TargetParser/TripleTest.cpp
index 7d07615d273d7..942912bcfe0e2 100644
--- a/llvm/unittests/TargetParser/TripleTest.cpp
+++ b/llvm/unittests/TargetParser/TripleTest.cpp
@@ -553,6 +553,13 @@ TEST(TripleTest, ParsedIDs) {
EXPECT_EQ(Triple::ShaderModel, T.getOS());
EXPECT_EQ(VersionTuple(1, 8), T.getDXILVersion());
+ T = Triple("dxilv1.9-unknown-shadermodel6.15-library");
+ EXPECT_EQ(Triple::dxil, T.getArch());
+ EXPECT_EQ(Triple::DXILSubArch_v1_9, T.getSubArch());
+ EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+ EXPECT_EQ(Triple::ShaderModel, T.getOS());
+ EXPECT_EQ(VersionTuple(1, 9), T.getDXILVersion());
+
T = Triple("x86_64-unknown-fuchsia");
EXPECT_EQ(Triple::x86_64, T.getArch());
EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
@@ -1270,6 +1277,12 @@ TEST(TripleTest, ParsedIDs) {
EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
EXPECT_EQ(Triple::UnknownOS, T.getOS());
+ T = Triple("dxilv1.9-unknown-unknown");
+ EXPECT_EQ(Triple::dxil, T.getArch());
+ EXPECT_EQ(Triple::DXILSubArch_v1_9, T.getSubArch());
+ EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+ EXPECT_EQ(Triple::UnknownOS, T.getOS());
+
// Check specification of unknown SubArch results in
// unknown architecture.
T = Triple("dxilv1.999-unknown-unknown");
More information about the llvm-commits
mailing list