[clang] [llvm] [DXIL] `exp`, `any`, `lerp`, & `rcp` Intrinsic Lowering (PR #84526)
Farzon Lotfi via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 8 09:58:01 PST 2024
https://github.com/farzonl created https://github.com/llvm/llvm-project/pull/84526
This change implements lowering for #70076, #70100, #70072, & #70102
`CGBuiltin.cpp` - - simplify `lerp` intrinsic
`IntrinsicsDirectX.td` - simplify `lerp` intrinsic
`SemaChecking.cpp` - remove unnecessary check
`DXILIntrinsicExpansion.*` - add intrinsic to instruction expansion cases
`DXILOpLowering.cpp` - make sure `DXILIntrinsicExpansion` happens first
`DirectX.h` - changes to support new pass
`DirectXTargetMachine.cpp` - changes to support new pass
>From ad83fd46be2e1587a6cb0098467e18dc38612517 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Thu, 7 Mar 2024 20:48:46 -0500
Subject: [PATCH] [DXIL] exp, any, lerp, & rcp Intrinsic Lowering This change
implements lowering for #70076, #70100, #70072, & #70102 CGBuiltin.cpp - -
simplify lerp intrinsic IntrinsicsDirectX.td - simplify lerp intrinsic
SemaChecking.cpp - remove unnecessary check DXILIntrinsicExpansion.* - add
intrinsic to instruction expansion cases DXILOpLowering.cpp - make sure
DXILIntrinsicExpansion happens first DirectX.h - changes to support new pass
DirectXTargetMachine.cpp - changes to support new pass
---
clang/lib/CodeGen/CGBuiltin.cpp | 35 +---
clang/lib/Sema/SemaChecking.cpp | 2 -
clang/test/CodeGenHLSL/builtins/lerp.hlsl | 13 +-
llvm/include/llvm/IR/IntrinsicsDirectX.td | 5 +-
llvm/lib/Target/DirectX/CMakeLists.txt | 1 +
.../Target/DirectX/DXILIntrinsicExpansion.cpp | 187 ++++++++++++++++++
.../Target/DirectX/DXILIntrinsicExpansion.h | 33 ++++
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 6 +-
llvm/lib/Target/DirectX/DirectX.h | 6 +
.../Target/DirectX/DirectXTargetMachine.cpp | 2 +
llvm/test/CodeGen/DirectX/any.ll | 133 +++++++++++++
llvm/test/CodeGen/DirectX/exp-vec.ll | 23 +++
llvm/test/CodeGen/DirectX/exp.ll | 38 ++++
llvm/test/CodeGen/DirectX/lerp.ll | 64 ++++++
llvm/test/CodeGen/DirectX/rcp.ll | 63 ++++++
15 files changed, 564 insertions(+), 47 deletions(-)
create mode 100644 llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
create mode 100644 llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
create mode 100644 llvm/test/CodeGen/DirectX/any.ll
create mode 100644 llvm/test/CodeGen/DirectX/exp-vec.ll
create mode 100644 llvm/test/CodeGen/DirectX/exp.ll
create mode 100644 llvm/test/CodeGen/DirectX/lerp.ll
create mode 100644 llvm/test/CodeGen/DirectX/rcp.ll
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 20c35757939152..1d12237fb25e05 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18009,38 +18009,11 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
Value *X = EmitScalarExpr(E->getArg(0));
Value *Y = EmitScalarExpr(E->getArg(1));
Value *S = EmitScalarExpr(E->getArg(2));
- llvm::Type *Xty = X->getType();
- llvm::Type *Yty = Y->getType();
- llvm::Type *Sty = S->getType();
- if (!Xty->isVectorTy() && !Yty->isVectorTy() && !Sty->isVectorTy()) {
- if (Xty->isFloatingPointTy()) {
- auto V = Builder.CreateFSub(Y, X);
- V = Builder.CreateFMul(S, V);
- return Builder.CreateFAdd(X, V, "dx.lerp");
- }
- llvm_unreachable("Scalar Lerp is only supported on floats.");
- }
- // A VectorSplat should have happened
- assert(Xty->isVectorTy() && Yty->isVectorTy() && Sty->isVectorTy() &&
- "Lerp of vector and scalar is not supported.");
-
- [[maybe_unused]] auto *XVecTy =
- E->getArg(0)->getType()->getAs<VectorType>();
- [[maybe_unused]] auto *YVecTy =
- E->getArg(1)->getType()->getAs<VectorType>();
- [[maybe_unused]] auto *SVecTy =
- E->getArg(2)->getType()->getAs<VectorType>();
- // A HLSLVectorTruncation should have happend
- assert(XVecTy->getNumElements() == YVecTy->getNumElements() &&
- XVecTy->getNumElements() == SVecTy->getNumElements() &&
- "Lerp requires vectors to be of the same size.");
- assert(XVecTy->getElementType()->isRealFloatingType() &&
- XVecTy->getElementType() == YVecTy->getElementType() &&
- XVecTy->getElementType() == SVecTy->getElementType() &&
- "Lerp requires float vectors to be of the same type.");
+ if (!E->getArg(0)->getType()->hasFloatingRepresentation())
+ llvm_unreachable("lerp operand must have a float representation");
return Builder.CreateIntrinsic(
- /*ReturnType=*/Xty, Intrinsic::dx_lerp, ArrayRef<Value *>{X, Y, S},
- nullptr, "dx.lerp");
+ /*ReturnType=*/X->getType(), Intrinsic::dx_lerp,
+ ArrayRef<Value *>{X, Y, S}, nullptr, "dx.lerp");
}
case Builtin::BI__builtin_hlsl_elementwise_frac: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a5f42b630c3fa2..8a2b7384a0b0d5 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5300,8 +5300,6 @@ bool Sema::CheckHLSLBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return true;
if (SemaBuiltinElementwiseTernaryMath(TheCall))
return true;
- if (CheckAllArgsHaveFloatRepresentation(this, TheCall))
- return true;
break;
}
case Builtin::BI__builtin_hlsl_mad: {
diff --git a/clang/test/CodeGenHLSL/builtins/lerp.hlsl b/clang/test/CodeGenHLSL/builtins/lerp.hlsl
index a6b3d9643d674c..38a71ed3bcec94 100644
--- a/clang/test/CodeGenHLSL/builtins/lerp.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/lerp.hlsl
@@ -6,13 +6,10 @@
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF
-// NATIVE_HALF: %3 = fsub half %1, %0
-// NATIVE_HALF: %4 = fmul half %2, %3
-// NATIVE_HALF: %dx.lerp = fadd half %0, %4
+
+// NATIVE_HALF: %dx.lerp = call half @llvm.dx.lerp.f16(half %0, half %1, half %2)
// NATIVE_HALF: ret half %dx.lerp
-// NO_HALF: %3 = fsub float %1, %0
-// NO_HALF: %4 = fmul float %2, %3
-// NO_HALF: %dx.lerp = fadd float %0, %4
+// NO_HALF: %dx.lerp = call float @llvm.dx.lerp.f32(float %0, float %1, float %2)
// NO_HALF: ret float %dx.lerp
half test_lerp_half(half p0) { return lerp(p0, p0, p0); }
@@ -34,9 +31,7 @@ half3 test_lerp_half3(half3 p0, half3 p1) { return lerp(p0, p0, p0); }
// NO_HALF: ret <4 x float> %dx.lerp
half4 test_lerp_half4(half4 p0, half4 p1) { return lerp(p0, p0, p0); }
-// CHECK: %3 = fsub float %1, %0
-// CHECK: %4 = fmul float %2, %3
-// CHECK: %dx.lerp = fadd float %0, %4
+// CHECK: %dx.lerp = call float @llvm.dx.lerp.f32(float %0, float %1, float %2)
// CHECK: ret float %dx.lerp
float test_lerp_float(float p0, float p1) { return lerp(p0, p0, p0); }
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index 7229292e377a83..c3597432cfb974 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -29,10 +29,7 @@ def int_dx_dot :
def int_dx_frac : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
-def int_dx_lerp :
- Intrinsic<[LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
- [llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>,LLVMScalarOrSameVectorWidth<0, LLVMVectorElementType<0>>],
- [IntrNoMem, IntrWillReturn] >;
+def int_dx_lerp : Intrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem, IntrWillReturn] >;
def int_dx_imad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_dx_umad : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt
index bf93280779bf8b..4c70b3f9230edb 100644
--- a/llvm/lib/Target/DirectX/CMakeLists.txt
+++ b/llvm/lib/Target/DirectX/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_target(DirectXCodeGen
DirectXSubtarget.cpp
DirectXTargetMachine.cpp
DXContainerGlobals.cpp
+ DXILIntrinsicExpansion.cpp
DXILMetadata.cpp
DXILOpBuilder.cpp
DXILOpLowering.cpp
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
new file mode 100644
index 00000000000000..54a01d20e20548
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -0,0 +1,187 @@
+//===- DXILIntrinsicExpansion.cpp - Prepare LLVM Module for DXIL encoding--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains DXIL intrinsic expansions for those that don't have
+// opcodes in DirectX Intermediate Language (DXIL).
+//===----------------------------------------------------------------------===//
+
+#include "DXILIntrinsicExpansion.h"
+#include "DirectX.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#define DEBUG_TYPE "dxil-intrinsic-expansion"
+#define M_LOG2E_F 1.44269504088896340735992468100189214f
+
+using namespace llvm;
+
+static bool isIntrinsicExpansion(Function &F) {
+ switch (F.getIntrinsicID()) {
+ case Intrinsic::exp:
+ case Intrinsic::dx_any:
+ case Intrinsic::dx_lerp:
+ case Intrinsic::dx_rcp:
+ return true;
+ }
+ return false;
+}
+
+static bool expandExpIntrinsic(CallInst *Orig) {
+ Value *X = Orig->getOperand(0);
+ IRBuilder<> Builder(Orig->getParent());
+ Builder.SetInsertPoint(Orig);
+ Type *Ty = X->getType();
+ Type *EltTy = Ty->getScalarType();
+ Constant *Log2eConst =
+ Ty->isVectorTy()
+ ? ConstantVector::getSplat(
+ ElementCount::getFixed(
+ dyn_cast<FixedVectorType>(Ty)->getNumElements()),
+ ConstantFP::get(EltTy, M_LOG2E_F))
+ : ConstantFP::get(EltTy, M_LOG2E_F);
+ Value *NewX = Builder.CreateFMul(Log2eConst, X);
+ auto *Exp2Call =
+ Builder.CreateIntrinsic(Ty, Intrinsic::exp2, {NewX}, nullptr, "dx.exp2");
+ Exp2Call->setTailCall(Orig->isTailCall());
+ Exp2Call->setAttributes(Orig->getAttributes());
+ Orig->replaceAllUsesWith(Exp2Call);
+ Orig->eraseFromParent();
+ return true;
+}
+
+static bool expandAnyIntrinsic(CallInst *Orig) {
+ Value *X = Orig->getOperand(0);
+ IRBuilder<> Builder(Orig->getParent());
+ Builder.SetInsertPoint(Orig);
+ Type *Ty = X->getType();
+ Type *EltTy = Ty->getScalarType();
+
+ if (!Ty->isVectorTy()) {
+ Value *Cond = EltTy->isFloatingPointTy()
+ ? Builder.CreateFCmpUNE(X, ConstantFP::get(EltTy, 0))
+ : Builder.CreateICmpNE(X, ConstantInt::get(EltTy, 0));
+ Orig->replaceAllUsesWith(Cond);
+ } else {
+ auto *XVec = dyn_cast<FixedVectorType>(Ty);
+ Value *Cond =
+ EltTy->isFloatingPointTy()
+ ? Builder.CreateFCmpUNE(
+ X, ConstantVector::getSplat(
+ ElementCount::getFixed(XVec->getNumElements()),
+ ConstantFP::get(EltTy, 0)))
+ : Builder.CreateICmpNE(
+ X, ConstantVector::getSplat(
+ ElementCount::getFixed(XVec->getNumElements()),
+ ConstantInt::get(EltTy, 0)));
+ Value *Result = Builder.CreateExtractElement(Cond, (uint64_t)0);
+ for (unsigned I = 1; I < XVec->getNumElements(); I++) {
+ Value *Elt = Builder.CreateExtractElement(Cond, I);
+ Result = Builder.CreateOr(Result, Elt);
+ }
+ Orig->replaceAllUsesWith(Result);
+ }
+ Orig->eraseFromParent();
+ return true;
+}
+
+static bool expandLerpIntrinsic(CallInst *Orig) {
+ Value *X = Orig->getOperand(0);
+ Value *Y = Orig->getOperand(1);
+ Value *S = Orig->getOperand(2);
+ IRBuilder<> Builder(Orig->getParent());
+ Builder.SetInsertPoint(Orig);
+ auto *V = Builder.CreateFSub(Y, X);
+ V = Builder.CreateFMul(S, V);
+ auto *Result = Builder.CreateFAdd(X, V, "dx.lerp");
+ Orig->replaceAllUsesWith(Result);
+ Orig->eraseFromParent();
+ return true;
+}
+
+static bool expandReciprocalIntrinsic(CallInst *Orig) {
+ Value *X = Orig->getOperand(0);
+ IRBuilder<> Builder(Orig->getParent());
+ Builder.SetInsertPoint(Orig);
+ Type *Ty = X->getType();
+ Type *EltTy = Ty->getScalarType();
+ Constant *One =
+ Ty->isVectorTy()
+ ? ConstantVector::getSplat(
+ ElementCount::getFixed(
+ dyn_cast<FixedVectorType>(Ty)->getNumElements()),
+ ConstantFP::get(EltTy, 1.0))
+ : ConstantFP::get(EltTy, 1.0);
+ auto *Result = Builder.CreateFDiv(One, X, "dx.rcp");
+ Orig->replaceAllUsesWith(Result);
+ Orig->eraseFromParent();
+ return true;
+}
+
+static bool expandIntrinsic(Function &F, CallInst *Orig) {
+ switch (F.getIntrinsicID()) {
+ case Intrinsic::exp:
+ return expandExpIntrinsic(Orig);
+ case Intrinsic::dx_any:
+ return expandAnyIntrinsic(Orig);
+ case Intrinsic::dx_lerp:
+ return expandLerpIntrinsic(Orig);
+ case Intrinsic::dx_rcp:
+ return expandReciprocalIntrinsic(Orig);
+ }
+ return false;
+}
+
+static bool intrinsicExpansion(Module &M) {
+ for (auto &F : make_early_inc_range(M.functions())) {
+ if (!isIntrinsicExpansion(F))
+ continue;
+ bool IntrinsicExpanded = false;
+ for (User *U : make_early_inc_range(F.users())) {
+ auto *IntrinsicCall = dyn_cast<CallInst>(U);
+ if (!IntrinsicCall)
+ continue;
+ IntrinsicExpanded = expandIntrinsic(F, IntrinsicCall);
+ }
+ if (F.user_empty() && IntrinsicExpanded)
+ F.eraseFromParent();
+ }
+ return true;
+}
+
+PreservedAnalyses DXILIntrinsicExpansion::run(Module &M,
+ ModuleAnalysisManager &) {
+ if (intrinsicExpansion(M))
+ return PreservedAnalyses::none();
+ return PreservedAnalyses::all();
+}
+
+bool DXILIntrinsicExpansionLegacy::runOnModule(Module &M) {
+ return intrinsicExpansion(M);
+}
+
+char DXILIntrinsicExpansionLegacy::ID = 0;
+
+INITIALIZE_PASS_BEGIN(DXILIntrinsicExpansionLegacy, DEBUG_TYPE,
+ "DXIL Intrinsic Expansion", false, false)
+INITIALIZE_PASS_END(DXILIntrinsicExpansionLegacy, DEBUG_TYPE,
+ "DXIL Intrinsic Expansion", false, false)
+
+ModulePass *llvm::createDXILIntrinsicExpansionLegacyPass() {
+ return new DXILIntrinsicExpansionLegacy();
+}
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
new file mode 100644
index 00000000000000..c86681af7a3712
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
@@ -0,0 +1,33 @@
+//===- DXILIntrinsicExpansion.h - Prepare LLVM Module for DXIL encoding----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
+#define LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
+
+#include "DXILResource.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A pass that transforms DXIL Intrinsics that don't have DXIL opCodes
+class DXILIntrinsicExpansion : public PassInfoMixin<DXILIntrinsicExpansion> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
+};
+
+class DXILIntrinsicExpansionLegacy : public ModulePass {
+
+public:
+ bool runOnModule(Module &M) override;
+ DXILIntrinsicExpansionLegacy() : ModulePass(ID) {}
+
+ static char ID; // Pass identification.
+};
+} // namespace llvm
+
+#endif // LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 6b649b76beecdf..e5c2042e7d16ae 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "DXILConstants.h"
+#include "DXILIntrinsicExpansion.h"
#include "DXILOpBuilder.h"
#include "DirectX.h"
#include "llvm/ADT/SmallVector.h"
@@ -94,9 +95,12 @@ class DXILOpLoweringLegacy : public ModulePass {
DXILOpLoweringLegacy() : ModulePass(ID) {}
static char ID; // Pass identification.
+ void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
+ // Specify the passes that your pass depends on
+ AU.addRequired<DXILIntrinsicExpansionLegacy>();
+ }
};
char DXILOpLoweringLegacy::ID = 0;
-
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(DXILOpLoweringLegacy, DEBUG_TYPE, "DXIL Op Lowering",
diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h
index eaecc3ac280c4c..11b5412c21d783 100644
--- a/llvm/lib/Target/DirectX/DirectX.h
+++ b/llvm/lib/Target/DirectX/DirectX.h
@@ -28,6 +28,12 @@ void initializeDXILPrepareModulePass(PassRegistry &);
/// Pass to convert modules into DXIL-compatable modules
ModulePass *createDXILPrepareModulePass();
+/// Initializer for DXIL Intrinsic Expansion
+void initializeDXILIntrinsicExpansionLegacyPass(PassRegistry &);
+
+/// Pass to expand intrinsic operations that lack DXIL opCodes
+ModulePass *createDXILIntrinsicExpansionLegacyPass();
+
/// Initializer for DXILOpLowering
void initializeDXILOpLoweringLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 06938f8c74f155..03c825b3977db3 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -39,6 +39,7 @@ using namespace llvm;
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget());
auto *PR = PassRegistry::getPassRegistry();
+ initializeDXILIntrinsicExpansionLegacyPass(*PR);
initializeDXILPrepareModulePass(*PR);
initializeEmbedDXILPassPass(*PR);
initializeWriteDXILPassPass(*PR);
@@ -76,6 +77,7 @@ class DirectXPassConfig : public TargetPassConfig {
FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
void addCodeGenPrepare() override {
+ addPass(createDXILIntrinsicExpansionLegacyPass());
addPass(createDXILOpLoweringLegacyPass());
addPass(createDXILPrepareModulePass());
addPass(createDXILTranslateMetadataPass());
diff --git a/llvm/test/CodeGen/DirectX/any.ll b/llvm/test/CodeGen/DirectX/any.ll
new file mode 100644
index 00000000000000..516afa101e948d
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/any.ll
@@ -0,0 +1,133 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for any are generated for float and half.
+
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-pc-shadermodel6.7-library"
+
+
+; CHECK:icmp ne i1 %{{.*}}, false
+; Function Attrs: noinline nounwind optnone
+define noundef i1 @any_bool(i1 noundef %p0) #0 {
+entry:
+ %p0.addr = alloca i8, align 1
+ %frombool = zext i1 %p0 to i8
+ store i8 %frombool, ptr %p0.addr, align 1
+ %0 = load i8, ptr %p0.addr, align 1
+ %tobool = trunc i8 %0 to i1
+ %dx.any = call i1 @llvm.dx.any.i1(i1 %tobool)
+ ret i1 %dx.any
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare i1 @llvm.dx.any.i1(i1) #1
+
+; CHECK:icmp ne i64 %{{.*}}, 0
+; Function Attrs: noinline nounwind optnone
+define noundef i1 @any_int64_t(i64 noundef %p0) #0 {
+entry:
+ %p0.addr = alloca i64, align 8
+ store i64 %p0, ptr %p0.addr, align 8
+ %0 = load i64, ptr %p0.addr, align 8
+ %dx.any = call i1 @llvm.dx.any.i64(i64 %0)
+ ret i1 %dx.any
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare i1 @llvm.dx.any.i64(i64) #1
+
+; CHECK:icmp ne i32 %{{.*}}, 0
+; Function Attrs: noinline nounwind optnone
+define noundef i1 @any_int(i32 noundef %p0) #0 {
+entry:
+ %p0.addr = alloca i32, align 4
+ store i32 %p0, ptr %p0.addr, align 4
+ %0 = load i32, ptr %p0.addr, align 4
+ %dx.any = call i1 @llvm.dx.any.i32(i32 %0)
+ ret i1 %dx.any
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare i1 @llvm.dx.any.i32(i32) #1
+
+; CHECK:icmp ne i16 %{{.*}}, 0
+; Function Attrs: noinline nounwind optnone
+define noundef i1 @any_int16_t(i16 noundef %p0) #0 {
+entry:
+ %p0.addr = alloca i16, align 2
+ store i16 %p0, ptr %p0.addr, align 2
+ %0 = load i16, ptr %p0.addr, align 2
+ %dx.any = call i1 @llvm.dx.any.i16(i16 %0)
+ ret i1 %dx.any
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare i1 @llvm.dx.any.i16(i16) #1
+
+; CHECK:fcmp une double %{{.*}}, 0.000000e+00
+; Function Attrs: noinline nounwind optnone
+define noundef i1 @any_double(double noundef %p0) #0 {
+entry:
+ %p0.addr = alloca double, align 8
+ store double %p0, ptr %p0.addr, align 8
+ %0 = load double, ptr %p0.addr, align 8
+ %dx.any = call i1 @llvm.dx.any.f64(double %0)
+ ret i1 %dx.any
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare i1 @llvm.dx.any.f64(double) #1
+
+; CHECK:fcmp une float %{{.*}}, 0.000000e+00
+; Function Attrs: noinline nounwind optnone
+define noundef i1 @any_float(float noundef %p0) #0 {
+entry:
+ %p0.addr = alloca float, align 4
+ store float %p0, ptr %p0.addr, align 4
+ %0 = load float, ptr %p0.addr, align 4
+ %dx.any = call i1 @llvm.dx.any.f32(float %0)
+ ret i1 %dx.any
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare i1 @llvm.dx.any.f32(float) #1
+
+; CHECK:fcmp une half %{{.*}}, 0xH0000
+; Function Attrs: noinline nounwind optnone
+define noundef i1 @any_half(half noundef %p0) #0 {
+entry:
+ %p0.addr = alloca half, align 2
+ store half %p0, ptr %p0.addr, align 2
+ %0 = load half, ptr %p0.addr, align 2
+ %dx.any = call i1 @llvm.dx.any.f16(half %0)
+ ret i1 %dx.any
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare i1 @llvm.dx.any.f16(half) #1
+
+; CHECK:icmp ne <4 x i1> %extractvec, zeroinitialize
+; CHECK:extractelement <4 x i1> %{{.*}}, i64 0
+; CHECK:extractelement <4 x i1> %{{.*}}, i64 1
+; CHECK:or i1 %{{.*}}, %{{.*}}
+; CHECK:extractelement <4 x i1> %{{.*}}, i64 2
+; CHECK:or i1 %{{.*}}, %{{.*}}
+; CHECK:extractelement <4 x i1> %{{.*}}, i64 3
+; CHECK:or i1 %{{.*}}, %{{.*}}
+; Function Attrs: noinline nounwind optnone
+define noundef i1 @any_bool4(<4 x i1> noundef %p0) #0 {
+entry:
+ %p0.addr = alloca i8, align 1
+ %insertvec = shufflevector <4 x i1> %p0, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+ %0 = bitcast <8 x i1> %insertvec to i8
+ store i8 %0, ptr %p0.addr, align 1
+ %load_bits = load i8, ptr %p0.addr, align 1
+ %1 = bitcast i8 %load_bits to <8 x i1>
+ %extractvec = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %dx.any = call i1 @llvm.dx.any.v4i1(<4 x i1> %extractvec)
+ ret i1 %dx.any
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare i1 @llvm.dx.any.v4i1(<4 x i1>) #1
diff --git a/llvm/test/CodeGen/DirectX/exp-vec.ll b/llvm/test/CodeGen/DirectX/exp-vec.ll
new file mode 100644
index 00000000000000..0f5577870cd89a
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/exp-vec.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s
+
+; Make sure dxil operation function calls for exp are generated for float and half.
+
+
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-pc-shadermodel6.7-library"
+
+; CHECK:fmul <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, %{{.*}}
+; CHECK:call <4 x float> @llvm.exp2.v4f32(<4 x float> %{{.*}})
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x float> @exp_float4(<4 x float> noundef %p0) #0 {
+entry:
+ %p0.addr = alloca <4 x float>, align 16
+ store <4 x float> %p0, ptr %p0.addr, align 16
+ %0 = load <4 x float>, ptr %p0.addr, align 16
+ %elt.exp = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
+ ret <4 x float> %elt.exp
+}
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) #1
diff --git a/llvm/test/CodeGen/DirectX/exp.ll b/llvm/test/CodeGen/DirectX/exp.ll
new file mode 100644
index 00000000000000..c2a39e4f5bde1d
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/exp.ll
@@ -0,0 +1,38 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for exp are generated for float and half.
+
+
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-pc-shadermodel6.7-library"
+
+; CHECK:fmul float 0x3FF7154760000000, %{{.*}}
+; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}})
+; Function Attrs: noinline nounwind optnone
+define noundef float @exp_float(float noundef %a) #0 {
+entry:
+ %a.addr = alloca float, align 4
+ store float %a, ptr %a.addr, align 4
+ %0 = load float, ptr %a.addr, align 4
+ %elt.exp = call float @llvm.exp.f32(float %0)
+ ret float %elt.exp
+}
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare float @llvm.exp.f32(float) #1
+
+; CHECK:fmul half 0xH3DC5, %{{.*}}
+; CHECK:call half @dx.op.unary.f16(i32 21, half %{{.*}})
+; Function Attrs: noinline nounwind optnone
+define noundef half @exp_half(half noundef %a) #0 {
+entry:
+ %a.addr = alloca half, align 2
+ store half %a, ptr %a.addr, align 2
+ %0 = load half, ptr %a.addr, align 2
+ %elt.exp = call half @llvm.exp.f16(half %0)
+ ret half %elt.exp
+}
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare half @llvm.exp.f16(half) #1
diff --git a/llvm/test/CodeGen/DirectX/lerp.ll b/llvm/test/CodeGen/DirectX/lerp.ll
new file mode 100644
index 00000000000000..7af7d5e18c4255
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/lerp.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for lerp are generated for float and half.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-pc-shadermodel6.7-library"
+
+; CHECK:fsub half %{{.*}}, %{{.*}}
+; CHECK:fmul half %{{.*}}, %{{.*}}
+; CHECK:fadd half %{{.*}}, %{{.*}}
+; Function Attrs: noinline nounwind optnone
+define noundef half @lerp_half(half noundef %p0) #0 {
+entry:
+ %p0.addr = alloca half, align 2
+ store half %p0, ptr %p0.addr, align 2
+ %0 = load half, ptr %p0.addr, align 2
+ %1 = load half, ptr %p0.addr, align 2
+ %2 = load half, ptr %p0.addr, align 2
+ %dx.lerp = call half @llvm.dx.lerp.f16(half %0, half %1, half %2)
+ ret half %dx.lerp
+}
+
+; Function Attrs: nounwind willreturn memory(none)
+declare half @llvm.dx.lerp.f16(half, half, half) #1
+
+; CHECK:fsub float %{{.*}}, %{{.*}}
+; CHECK:fmul float %{{.*}}, %{{.*}}
+; CHECK:fadd float %{{.*}}, %{{.*}}
+; Function Attrs: noinline nounwind optnone
+define noundef float @lerp_float(float noundef %p0, float noundef %p1) #0 {
+entry:
+ %p1.addr = alloca float, align 4
+ %p0.addr = alloca float, align 4
+ store float %p1, ptr %p1.addr, align 4
+ store float %p0, ptr %p0.addr, align 4
+ %0 = load float, ptr %p0.addr, align 4
+ %1 = load float, ptr %p0.addr, align 4
+ %2 = load float, ptr %p0.addr, align 4
+ %dx.lerp = call float @llvm.dx.lerp.f32(float %0, float %1, float %2)
+ ret float %dx.lerp
+}
+
+; Function Attrs: nounwind willreturn memory(none)
+declare float @llvm.dx.lerp.f32(float, float, float) #1
+
+; CHECK:fsub <4 x float> %{{.*}}, %{{.*}}
+; CHECK:fmul <4 x float> %{{.*}}, %{{.*}}
+; CHECK:fadd <4 x float> %{{.*}}, %{{.*}}
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x float> @lerp_float4(<4 x float> noundef %p0, <4 x float> noundef %p1) #0 {
+entry:
+ %p1.addr = alloca <4 x float>, align 16
+ %p0.addr = alloca <4 x float>, align 16
+ store <4 x float> %p1, ptr %p1.addr, align 16
+ store <4 x float> %p0, ptr %p0.addr, align 16
+ %0 = load <4 x float>, ptr %p0.addr, align 16
+ %1 = load <4 x float>, ptr %p0.addr, align 16
+ %2 = load <4 x float>, ptr %p0.addr, align 16
+ %dx.lerp = call <4 x float> @llvm.dx.lerp.v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2)
+ ret <4 x float> %dx.lerp
+}
+
+; Function Attrs: nounwind willreturn memory(none)
+declare <4 x float> @llvm.dx.lerp.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
diff --git a/llvm/test/CodeGen/DirectX/rcp.ll b/llvm/test/CodeGen/DirectX/rcp.ll
new file mode 100644
index 00000000000000..2cc80b6f2c4784
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/rcp.ll
@@ -0,0 +1,63 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for rcp are generated for float, double, and half.
+
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-pc-shadermodel6.7-library"
+
+; CHECK:fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x float> @rcp_float4(<4 x float> noundef %p0) #0 {
+entry:
+ %p0.addr = alloca <4 x float>, align 16
+ store <4 x float> %p0, ptr %p0.addr, align 16
+ %0 = load <4 x float>, ptr %p0.addr, align 16
+ %dx.rcp = call <4 x float> @llvm.dx.rcp.v4f32(<4 x float> %0)
+ ret <4 x float> %dx.rcp
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x float> @llvm.dx.rcp.v4f32(<4 x float>) #1
+
+; CHECK:fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %{{.*}}
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x double> @rcp_double4(<4 x double> noundef %p0) #0 {
+entry:
+ %p0.addr = alloca <4 x double>, align 16
+ store <4 x double> %p0, ptr %p0.addr, align 16
+ %0 = load <4 x double>, ptr %p0.addr, align 16
+ %dx.rcp = call <4 x double> @llvm.dx.rcp.v4f64(<4 x double> %0)
+ ret <4 x double> %dx.rcp
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x double> @llvm.dx.rcp.v4f64(<4 x double>) #1
+
+; CHECK:fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, %{{.*}}
+; Function Attrs: noinline nounwind optnone
+define noundef <4 x half> @rcp_half4(<4 x half> noundef %p0) #0 {
+entry:
+ %p0.addr = alloca <4 x half>, align 16
+ store <4 x half> %p0, ptr %p0.addr, align 16
+ %0 = load <4 x half>, ptr %p0.addr, align 16
+ %dx.rcp = call <4 x half> @llvm.dx.rcp.v4f16(<4 x half> %0)
+ ret <4 x half> %dx.rcp
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare <4 x half> @llvm.dx.rcp.v4f16(<4 x half>) #1
+
+; CHECK:fdiv half 0xH3C00, %{{.*}}
+; Function Attrs: noinline nounwind optnone
+define noundef half @rcp_half(half noundef %p0) #0 {
+entry:
+ %p0.addr = alloca half, align 2
+ store half %p0, ptr %p0.addr, align 2
+ %0 = load half, ptr %p0.addr, align 2
+ %dx.rcp = call half @llvm.dx.rcp.f16(half %0)
+ ret half %dx.rcp
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare half @llvm.dx.rcp.f16(half) #1
More information about the cfe-commits
mailing list