[llvm] [LegacyPM][DirectX] Add legacy scalarizer back for use in the DirectX backend (PR #107427)
Farzon Lotfi via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 17:09:04 PDT 2024
https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/107427
>From 4ae12ca7c2dea0246c887bbacaa6c20a20aaaf36 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzon at farzon.org>
Date: Thu, 29 Aug 2024 15:07:02 -0400
Subject: [PATCH 1/3] [LegacyPM][DirectX] Add the scalarizer pass for DXIL
legalization
---
llvm/include/llvm/InitializePasses.h | 1 +
llvm/include/llvm/LinkAllPasses.h | 1 +
.../llvm/Transforms/Scalar/Scalarizer.h | 14 ++++++++
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 2 ++
.../Target/DirectX/DirectXTargetMachine.cpp | 4 +++
llvm/lib/Transforms/Scalar/Scalar.cpp | 1 +
llvm/lib/Transforms/Scalar/Scalarizer.cpp | 32 ++++++++++++++++++-
llvm/test/CodeGen/DirectX/sin.ll | 16 +++++-----
8 files changed, 62 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 6605c6fde92510..f8b9f42d9dbecb 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -276,6 +276,7 @@ void initializeSafepointIRVerifierPass(PassRegistry &);
void initializeSelectOptimizePass(PassRegistry &);
void initializeScalarEvolutionWrapperPassPass(PassRegistry &);
void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &);
+void initializeScalarizerLegacyPassPass(PassRegistry&);
void initializeScavengerTestPass(PassRegistry &);
void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &);
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 1da02153d846f1..92b59a66567c95 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -130,6 +130,7 @@ struct ForcePassLinking {
(void)llvm::createLowerAtomicPass();
(void)llvm::createLoadStoreVectorizerPass();
(void)llvm::createPartiallyInlineLibCallsPass();
+ (void)llvm::createScalarizerPass();
(void)llvm::createSeparateConstOffsetFromGEPPass();
(void)llvm::createSpeculativeExecutionPass();
(void)llvm::createSpeculativeExecutionIfHasBranchDivergencePass();
diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
index 45e25cbf282149..7454f00c2ea35b 100644
--- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -19,6 +19,7 @@
#define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
#include <optional>
namespace llvm {
@@ -50,6 +51,19 @@ class ScalarizerPass : public PassInfoMixin<ScalarizerPass> {
void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; }
void setScalarizeMinBits(unsigned Value) { Options.ScalarizeMinBits = Value; }
};
+
+/// Create a legacy pass manager instance of the Scalarizer pass
+FunctionPass *createScalarizerPass();
+
+class ScalarizerLegacyPass : public FunctionPass {
+public:
+ static char ID;
+ ScalarizerPassOptions Options;
+ ScalarizerLegacyPass();
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage& AU) const override;
+};
+
}
#endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index d98d0bfde04fc6..32126612d14811 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -24,6 +24,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
#define DEBUG_TYPE "dxil-op-lower"
@@ -521,6 +522,7 @@ class DXILOpLoweringLegacy : public ModulePass {
static char ID; // Pass identification.
void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
AU.addRequired<DXILIntrinsicExpansionLegacy>();
+ AU.addRequired<ScalarizerLegacyPass>();
AU.addRequired<DXILResourceWrapperPass>();
AU.addPreserved<DXILResourceWrapperPass>();
}
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index a29fc210421637..531e74eda04bcc 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/InitializePasses.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/MCSectionDXContainer.h"
@@ -36,6 +37,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
#include <optional>
using namespace llvm;
@@ -44,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget());
auto *PR = PassRegistry::getPassRegistry();
initializeDXILIntrinsicExpansionLegacyPass(*PR);
+ initializeScalarizerLegacyPassPass(*PR);
initializeDXILPrepareModulePass(*PR);
initializeEmbedDXILPassPass(*PR);
initializeWriteDXILPassPass(*PR);
@@ -83,6 +86,7 @@ class DirectXPassConfig : public TargetPassConfig {
FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
void addCodeGenPrepare() override {
addPass(createDXILIntrinsicExpansionLegacyPass());
+ addPass(createScalarizerPass());
addPass(createDXILOpLoweringLegacyPass());
addPass(createDXILFinalizeLinkageLegacyPass());
addPass(createDXILTranslateMetadataLegacyPass());
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 7aeee1d31f7e79..fa6e671830d962 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeConstantHoistingLegacyPassPass(Registry);
initializeDCELegacyPassPass(Registry);
+ initializeScalarizerLegacyPassPass(Registry);
initializeGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 2bed3480da1cda..ad441914428c4b 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -36,6 +36,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -339,9 +340,25 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
const bool ScalarizeLoadStore;
const unsigned ScalarizeMinBits;
};
-
} // end anonymous namespace
+ScalarizerLegacyPass::ScalarizerLegacyPass() : FunctionPass(ID) {
+ Options.ScalarizeVariableInsertExtract = true;
+ Options.ScalarizeLoadStore = true;
+}
+
+void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage& AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+char ScalarizerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
+ "Scalarize vector operations", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
+ "Scalarize vector operations", false, false)
+
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
const VectorSplit &VS, ValueVector *cachePtr)
: BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
@@ -414,6 +431,19 @@ Value *Scatterer::operator[](unsigned Frag) {
return CV[Frag];
}
+bool ScalarizerLegacyPass::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ScalarizerVisitor Impl(DT, Options);
+ return Impl.visit(F);
+}
+
+FunctionPass *llvm::createScalarizerPass() {
+ return new ScalarizerLegacyPass();
+}
+
bool ScalarizerVisitor::visit(Function &F) {
assert(Gathered.empty() && Scattered.empty());
diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll
index f309a36c6b8e6b..79143bfa0a5298 100644
--- a/llvm/test/CodeGen/DirectX/sin.ll
+++ b/llvm/test/CodeGen/DirectX/sin.ll
@@ -7,19 +7,19 @@
; Function Attrs: noinline nounwind optnone
define noundef float @sin_float(float noundef %a) #0 {
entry:
- %a.addr = alloca float, align 4
- store float %a, ptr %a.addr, align 4
- %0 = load float, ptr %a.addr, align 4
- %1 = call float @llvm.sin.f32(float %0)
+ %1 = call float @llvm.sin.f32(float %a)
ret float %1
}
; Function Attrs: noinline nounwind optnone
define noundef half @sin_half(half noundef %a) #0 {
entry:
- %a.addr = alloca half, align 2
- store half %a, ptr %a.addr, align 2
- %0 = load half, ptr %a.addr, align 2
- %1 = call half @llvm.sin.f16(half %0)
+ %1 = call half @llvm.sin.f16(half %a)
ret half %1
}
+
+define noundef <4 x float> @sin_float4(<4 x float> noundef %a) #0 {
+entry:
+ %2 = call <4 x float> @llvm.sin.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
>From c554589afef069a3e957f1316898f1883e0ab636 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzon at farzon.org>
Date: Wed, 4 Sep 2024 12:50:59 -0400
Subject: [PATCH 2/3] - Update tests - Modify constructor to take options
---
llvm/include/llvm/InitializePasses.h | 2 +-
.../llvm/Transforms/Scalar/Scalarizer.h | 15 ++----
llvm/lib/Target/DirectX/DXILOpLowering.cpp | 2 -
.../Target/DirectX/DirectXTargetMachine.cpp | 7 ++-
llvm/lib/Transforms/Scalar/Scalarizer.cpp | 27 ++++++----
llvm/test/CodeGen/DirectX/acos.ll | 25 ++++++++--
llvm/test/CodeGen/DirectX/asin.ll | 25 ++++++++--
llvm/test/CodeGen/DirectX/atan.ll | 25 ++++++++--
llvm/test/CodeGen/DirectX/ceil.ll | 21 +++++++-
llvm/test/CodeGen/DirectX/cos.ll | 21 +++++++-
llvm/test/CodeGen/DirectX/cosh.ll | 25 ++++++++--
llvm/test/CodeGen/DirectX/exp2.ll | 50 +++++++++++--------
llvm/test/CodeGen/DirectX/fabs.ll | 22 +++++++-
llvm/test/CodeGen/DirectX/floor.ll | 23 ++++++++-
llvm/test/CodeGen/DirectX/isinf.ll | 24 ++++-----
llvm/test/CodeGen/DirectX/reversebits.ll | 21 +++++++-
llvm/test/CodeGen/DirectX/round.ll | 22 +++++++-
llvm/test/CodeGen/DirectX/saturate.ll | 17 +++----
llvm/test/CodeGen/DirectX/scalar-store.ll | 17 +++++++
.../DirectX/scalarization_pass_order.ll | 45 +++++++++++++++++
llvm/test/CodeGen/DirectX/sin.ll | 30 ++++++++---
llvm/test/CodeGen/DirectX/sinh.ll | 25 ++++++++--
llvm/test/CodeGen/DirectX/sqrt.ll | 25 ++++++++--
llvm/test/CodeGen/DirectX/tan.ll | 21 +++++++-
llvm/test/CodeGen/DirectX/tanh.ll | 21 +++++++-
llvm/test/CodeGen/DirectX/trunc.ll | 21 +++++++-
llvm/tools/opt/optdriver.cpp | 1 +
27 files changed, 472 insertions(+), 108 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/scalar-store.ll
create mode 100644 llvm/test/CodeGen/DirectX/scalarization_pass_order.ll
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index f8b9f42d9dbecb..4352099d6dbb99 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -276,7 +276,7 @@ void initializeSafepointIRVerifierPass(PassRegistry &);
void initializeSelectOptimizePass(PassRegistry &);
void initializeScalarEvolutionWrapperPassPass(PassRegistry &);
void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &);
-void initializeScalarizerLegacyPassPass(PassRegistry&);
+void initializeScalarizerLegacyPassPass(PassRegistry &);
void initializeScavengerTestPass(PassRegistry &);
void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &);
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
index 7454f00c2ea35b..4d2a1a2f889a3c 100644
--- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -19,12 +19,12 @@
#define LLVM_TRANSFORMS_SCALAR_SCALARIZER_H
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
#include <optional>
namespace llvm {
class Function;
+class FunctionPass;
struct ScalarizerPassOptions {
// These options correspond 1:1 to cl::opt options defined in
@@ -53,17 +53,8 @@ class ScalarizerPass : public PassInfoMixin<ScalarizerPass> {
};
/// Create a legacy pass manager instance of the Scalarizer pass
-FunctionPass *createScalarizerPass();
-
-class ScalarizerLegacyPass : public FunctionPass {
-public:
- static char ID;
- ScalarizerPassOptions Options;
- ScalarizerLegacyPass();
- bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage& AU) const override;
-};
-
+FunctionPass *createScalarizerPass(
+ const ScalarizerPassOptions &Options = ScalarizerPassOptions());
}
#endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 32126612d14811..d98d0bfde04fc6 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -24,7 +24,6 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/Scalar/Scalarizer.h"
#define DEBUG_TYPE "dxil-op-lower"
@@ -522,7 +521,6 @@ class DXILOpLoweringLegacy : public ModulePass {
static char ID; // Pass identification.
void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
AU.addRequired<DXILIntrinsicExpansionLegacy>();
- AU.addRequired<ScalarizerLegacyPass>();
AU.addRequired<DXILResourceWrapperPass>();
AU.addPreserved<DXILResourceWrapperPass>();
}
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 531e74eda04bcc..f021e24ac7e26e 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -26,9 +26,9 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/InitializePasses.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
#include "llvm/MC/MCSectionDXContainer.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/MC/TargetRegistry.h"
@@ -86,7 +86,10 @@ class DirectXPassConfig : public TargetPassConfig {
FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
void addCodeGenPrepare() override {
addPass(createDXILIntrinsicExpansionLegacyPass());
- addPass(createScalarizerPass());
+ ScalarizerPassOptions DxilScalarOptions;
+ // The only non-default option we need to set is ScalarizeLoadStore.
+ DxilScalarOptions.ScalarizeLoadStore = true;
+ addPass(createScalarizerPass(DxilScalarOptions));
addPass(createDXILOpLoweringLegacyPass());
addPass(createDXILFinalizeLinkageLegacyPass());
addPass(createDXILTranslateMetadataLegacyPass());
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index ad441914428c4b..01d24335df2262 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -340,16 +340,25 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
const bool ScalarizeLoadStore;
const unsigned ScalarizeMinBits;
};
+
+class ScalarizerLegacyPass : public FunctionPass {
+public:
+ static char ID;
+ ScalarizerPassOptions Options;
+ ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
+ ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
+ bool runOnFunction(Function &F) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
} // end anonymous namespace
-ScalarizerLegacyPass::ScalarizerLegacyPass() : FunctionPass(ID) {
- Options.ScalarizeVariableInsertExtract = true;
- Options.ScalarizeLoadStore = true;
-}
+ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
+ : FunctionPass(ID), Options(Options) {}
-void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage& AU) const {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
+void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
}
char ScalarizerLegacyPass::ID = 0;
@@ -440,8 +449,8 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) {
return Impl.visit(F);
}
-FunctionPass *llvm::createScalarizerPass() {
- return new ScalarizerLegacyPass();
+FunctionPass *llvm::createScalarizerPass(const ScalarizerPassOptions &Options) {
+ return new ScalarizerLegacyPass(Options);
}
bool ScalarizerVisitor::visit(Function &F) {
diff --git a/llvm/test/CodeGen/DirectX/acos.ll b/llvm/test/CodeGen/DirectX/acos.ll
index cc32182395627c..f4a10eb368ebfb 100644
--- a/llvm/test/CodeGen/DirectX/acos.ll
+++ b/llvm/test/CodeGen/DirectX/acos.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for acos are generated for float and half.
-define noundef float @tan_float(float noundef %a) {
+define noundef float @acos_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}})
%elt.acos = call float @llvm.acos.f32(float %a)
ret float %elt.acos
}
-define noundef half @tan_half(half noundef %a) {
+define noundef half @acos_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}})
%elt.acos = call half @llvm.acos.f16(half %a)
ret half %elt.acos
}
+define noundef <4 x float> @acos_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.acos.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.acos.f16(half)
declare float @llvm.acos.f32(float)
+declare <4 x float> @llvm.acos.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/asin.ll b/llvm/test/CodeGen/DirectX/asin.ll
index 06e3bab545a6aa..bd948f593c24e2 100644
--- a/llvm/test/CodeGen/DirectX/asin.ll
+++ b/llvm/test/CodeGen/DirectX/asin.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for asin are generated for float and half.
-define noundef float @tan_float(float noundef %a) {
+define noundef float @asin_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}})
%elt.asin = call float @llvm.asin.f32(float %a)
ret float %elt.asin
}
-define noundef half @tan_half(half noundef %a) {
+define noundef half @asin_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}})
%elt.asin = call half @llvm.asin.f16(half %a)
ret half %elt.asin
}
+define noundef <4 x float> @asin_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.asin.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.asin.f16(half)
declare float @llvm.asin.f32(float)
+declare <4 x float> @llvm.asin.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/atan.ll b/llvm/test/CodeGen/DirectX/atan.ll
index d7c4cd00e286a0..58899ab49bdb8e 100644
--- a/llvm/test/CodeGen/DirectX/atan.ll
+++ b/llvm/test/CodeGen/DirectX/atan.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for atan are generated for float and half.
-define noundef float @tan_float(float noundef %a) {
+define noundef float @atan_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}})
%elt.atan = call float @llvm.atan.f32(float %a)
ret float %elt.atan
}
-define noundef half @tan_half(half noundef %a) {
+define noundef half @atan_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}})
%elt.atan = call half @llvm.atan.f16(half %a)
ret half %elt.atan
}
+define noundef <4 x float> @atan_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.atan.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.atan.f16(half)
declare float @llvm.atan.f32(float)
+declare <4 x float> @llvm.atan.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/ceil.ll b/llvm/test/CodeGen/DirectX/ceil.ll
index 48bc5495a8e051..bd6e747c2fbf5f 100644
--- a/llvm/test/CodeGen/DirectX/ceil.ll
+++ b/llvm/test/CodeGen/DirectX/ceil.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for ceil are generated for float and half.
@@ -16,5 +16,24 @@ entry:
ret half %elt.ceil
}
+define noundef <4 x float> @ceil_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.ceil.f16(half)
declare float @llvm.ceil.f32(float)
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/cos.ll b/llvm/test/CodeGen/DirectX/cos.ll
index 72f4bfca23f9d5..85f5db25570b90 100644
--- a/llvm/test/CodeGen/DirectX/cos.ll
+++ b/llvm/test/CodeGen/DirectX/cos.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for cos are generated for float and half.
@@ -16,5 +16,24 @@ entry:
ret half %elt.cos
}
+define noundef <4 x float> @cos_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.cos.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.cos.f16(half)
declare float @llvm.cos.f32(float)
+declare <4 x float> @llvm.cos.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/cosh.ll b/llvm/test/CodeGen/DirectX/cosh.ll
index 91aaf893f3997c..670a8a3eae0864 100644
--- a/llvm/test/CodeGen/DirectX/cosh.ll
+++ b/llvm/test/CodeGen/DirectX/cosh.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for cosh are generated for float and half.
-define noundef float @tan_float(float noundef %a) {
+define noundef float @cosh_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}})
%elt.cosh = call float @llvm.cosh.f32(float %a)
ret float %elt.cosh
}
-define noundef half @tan_half(half noundef %a) {
+define noundef half @cosh_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}})
%elt.cosh = call half @llvm.cosh.f16(half %a)
ret half %elt.cosh
}
+define noundef <4 x float> @cosh_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.cosh.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.cosh.f16(half)
declare float @llvm.cosh.f32(float)
+declare <4 x float> @llvm.cosh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/exp2.ll b/llvm/test/CodeGen/DirectX/exp2.ll
index b70b87dedc4d1e..6d16af6a5413e0 100644
--- a/llvm/test/CodeGen/DirectX/exp2.ll
+++ b/llvm/test/CodeGen/DirectX/exp2.ll
@@ -1,31 +1,39 @@
-; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.7-library %s | FileCheck %s
; Make sure dxil operation function calls for exp2 are generated for float and half.
-; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}})
-; CHECK:call half @dx.op.unary.f16(i32 21, half %{{.*}})
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.7-library"
-
-; Function Attrs: noinline nounwind optnone
-define noundef float @exp2_float(float noundef %a) #0 {
+define noundef float @exp2_float(float noundef %a) {
entry:
- %a.addr = alloca float, align 4
- store float %a, ptr %a.addr, align 4
- %0 = load float, ptr %a.addr, align 4
- %elt.exp2 = call float @llvm.exp2.f32(float %0)
+ ; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}})
+ %elt.exp2 = call float @llvm.exp2.f32(float %a)
ret float %elt.exp2
}
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.exp2.f32(float) #1
-
-; Function Attrs: noinline nounwind optnone
-define noundef half @exp2_half(half noundef %a) #0 {
+define noundef half @exp2_half(half noundef %a) {
entry:
- %a.addr = alloca half, align 2
- store half %a, ptr %a.addr, align 2
- %0 = load half, ptr %a.addr, align 2
- %elt.exp2 = call half @llvm.exp2.f16(half %0)
+ ; CHECK:call half @dx.op.unary.f16(i32 21, half %{{.*}})
+ %elt.exp2 = call half @llvm.exp2.f16(half %a)
ret half %elt.exp2
}
+
+define noundef <4 x float> @exp2_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 21, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
+declare float @llvm.exp2.f32(float)
+declare half @llvm.exp2.f16(half)
+declare <4 x float> @llvm.exp2.v4f32(<4 x float> %a)
diff --git a/llvm/test/CodeGen/DirectX/fabs.ll b/llvm/test/CodeGen/DirectX/fabs.ll
index becbdf8d68aeb1..6d903f1c927ace 100644
--- a/llvm/test/CodeGen/DirectX/fabs.ll
+++ b/llvm/test/CodeGen/DirectX/fabs.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for abs are generated for float, half, and double.
@@ -27,6 +27,26 @@ entry:
ret double %elt.abs
}
+; CHECK-LABEL: fabs_float4
+define noundef <4 x float> @fabs_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 6, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.fabs.f16(half)
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/floor.ll b/llvm/test/CodeGen/DirectX/floor.ll
index f79f160e51e3b2..8ad81e1459a5ba 100644
--- a/llvm/test/CodeGen/DirectX/floor.ll
+++ b/llvm/test/CodeGen/DirectX/floor.ll
@@ -2,19 +2,38 @@
; Make sure dxil operation function calls for floor are generated for float and half.
-define noundef float @floor_float(float noundef %a) #0 {
+define noundef float @floor_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 27, float %{{.*}})
%elt.floor = call float @llvm.floor.f32(float %a)
ret float %elt.floor
}
-define noundef half @floor_half(half noundef %a) #0 {
+define noundef half @floor_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 27, half %{{.*}})
%elt.floor = call half @llvm.floor.f16(half %a)
ret half %elt.floor
}
+define noundef <4 x float> @floor_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.floor.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.floor.f16(half)
declare float @llvm.floor.f32(float)
+declare <4 x float> @llvm.floor.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/isinf.ll b/llvm/test/CodeGen/DirectX/isinf.ll
index 295776b0893476..03a00c40498d5a 100644
--- a/llvm/test/CodeGen/DirectX/isinf.ll
+++ b/llvm/test/CodeGen/DirectX/isinf.ll
@@ -1,25 +1,21 @@
; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for isinf are generated for float and half.
-; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}})
-; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}})
-; Function Attrs: noinline nounwind optnone
-define noundef i1 @isinf_float(float noundef %a) #0 {
+define noundef i1 @isinf_float(float noundef %a) {
entry:
- %a.addr = alloca float, align 4
- store float %a, ptr %a.addr, align 4
- %0 = load float, ptr %a.addr, align 4
- %dx.isinf = call i1 @llvm.dx.isinf.f32(float %0)
+ ; CHECK: call i1 @dx.op.isSpecialFloat.f32(i32 9, float %{{.*}})
+ %dx.isinf = call i1 @llvm.dx.isinf.f32(float %a)
ret i1 %dx.isinf
}
-; Function Attrs: noinline nounwind optnone
-define noundef i1 @isinf_half(half noundef %p0) #0 {
+define noundef i1 @isinf_half(half noundef %a) {
entry:
- %p0.addr = alloca half, align 2
- store half %p0, ptr %p0.addr, align 2
- %0 = load half, ptr %p0.addr, align 2
- %dx.isinf = call i1 @llvm.dx.isinf.f16(half %0)
+ ; CHECK: call i1 @dx.op.isSpecialFloat.f16(i32 9, half %{{.*}})
+ %dx.isinf = call i1 @llvm.dx.isinf.f16(half %a)
ret i1 %dx.isinf
}
+
+
+declare i1 @llvm.dx.isinf.f16(half)
+declare i1 @llvm.dx.isinf.f32(float)
diff --git a/llvm/test/CodeGen/DirectX/reversebits.ll b/llvm/test/CodeGen/DirectX/reversebits.ll
index 1ade57b40100ff..b5530d0850e663 100644
--- a/llvm/test/CodeGen/DirectX/reversebits.ll
+++ b/llvm/test/CodeGen/DirectX/reversebits.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for reversebits are generated for all integer types.
@@ -26,6 +26,25 @@ entry:
ret i64 %elt.bitreverse
}
+define noundef <4 x i32> @round_int324(<4 x i32> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 30, i32 [[ee3]])
+ ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
+ ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie3]], i64 3
+ %2 = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
+ ret <4 x i32> %2
+}
+
declare i16 @llvm.bitreverse.i16(i16)
declare i32 @llvm.bitreverse.i32(i32)
declare i64 @llvm.bitreverse.i64(i64)
+declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>)
diff --git a/llvm/test/CodeGen/DirectX/round.ll b/llvm/test/CodeGen/DirectX/round.ll
index db953fb29c2046..b08cbac5f42e91 100644
--- a/llvm/test/CodeGen/DirectX/round.ll
+++ b/llvm/test/CodeGen/DirectX/round.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for round are generated for float and half.
@@ -18,5 +18,25 @@ entry:
ret float %elt.roundeven
}
+define noundef <4 x float> @round_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 26, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
+
declare half @llvm.roundeven.f16(half)
declare float @llvm.roundeven.f32(float)
+declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/saturate.ll b/llvm/test/CodeGen/DirectX/saturate.ll
index a8557351756f2b..404cab7b665d0e 100644
--- a/llvm/test/CodeGen/DirectX/saturate.ll
+++ b/llvm/test/CodeGen/DirectX/saturate.ll
@@ -2,7 +2,7 @@
; Make sure the intrinsic dx.saturate is to appropriate DXIL op for half/float/double data types.
; CHECK-LABEL: test_saturate_half
-define noundef half @test_saturate_half(half noundef %p0) #0 {
+define noundef half @test_saturate_half(half noundef %p0) {
entry:
; CHECK: call half @dx.op.unary.f16(i32 7, half %p0)
%hlsl.saturate = call half @llvm.dx.saturate.f16(half %p0)
@@ -10,11 +10,8 @@ entry:
ret half %hlsl.saturate
}
-; Function Attrs: nocallback nofree nosync nounwind willreturn
-declare half @llvm.dx.saturate.f16(half) #1
-
; CHECK-LABEL: test_saturate_float
-define noundef float @test_saturate_float(float noundef %p0) #0 {
+define noundef float @test_saturate_float(float noundef %p0) {
entry:
; CHECK: call float @dx.op.unary.f32(i32 7, float %p0)
%hlsl.saturate = call float @llvm.dx.saturate.f32(float %p0)
@@ -22,11 +19,8 @@ entry:
ret float %hlsl.saturate
}
-; Function Attrs: nocallback nofree nosync nounwind willreturn
-declare float @llvm.dx.saturate.f32(float) #1
-
; CHECK-LABEL: test_saturate_double
-define noundef double @test_saturate_double(double noundef %p0) #0 {
+define noundef double @test_saturate_double(double noundef %p0) {
entry:
; CHECK: call double @dx.op.unary.f64(i32 7, double %p0)
%hlsl.saturate = call double @llvm.dx.saturate.f64(double %p0)
@@ -34,6 +28,7 @@ entry:
ret double %hlsl.saturate
}
-; Function Attrs: nocallback nofree nosync nounwind willreturn
-declare double @llvm.dx.saturate.f64(double) #1
+declare half @llvm.dx.saturate.f16(half)
+declare float @llvm.dx.saturate.f32(float)
+declare double @llvm.dx.saturate.f64(double)
diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll
new file mode 100644
index 00000000000000..b970a2842e5a8b
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalar-store.ll
@@ -0,0 +1,17 @@
+; RUN: opt -S -scalarizer -scalarize-load-store -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
+
+@"sharedData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
+; CHECK-LABEL: store_test
+define void @store_test () local_unnamed_addr {
+ ; CHECK: store float 1.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+ ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+ ; CHECK: store float 3.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+ ; CHECK: store float 2.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+ ; CHECK: store float 4.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+ ; CHECK: store float 6.000000e+00, ptr addrspace(3) {{.*}}, align {{.*}}
+
+ store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"sharedData", align 16
+ store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"sharedData", i32 16), align 16
+ ret void
+ }
diff --git a/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll b/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll
new file mode 100644
index 00000000000000..f33c2a7ccdcef2
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | \
+; RUN: grep -v "Verify generated machine code" | FileCheck %s
+; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s --check-prefixes=CHECKIR
+; CHECK-LABEL: Pass Arguments:
+; CHECK-NEXT: Target Library Information
+; CHECK-NEXT: ModulePass Manager
+; CHECK-NEXT: DXIL Intrinsic Expansion
+; CHECK-NEXT: FunctionPass Manager
+; CHECK-NEXT: Dominator Tree Construction
+; CHECK-NEXT: Scalarize vector operations
+; CHECK-NEXT: DXIL Intrinsic Expansion
+; CHECK-NEXT: DXIL Resource analysis
+; CHECK-NEXT: DXIL Op Lowering
+; CHECK-NEXT: DXIL Finalize Linkage
+; CHECK-NEXT: DXIL Resource analysis
+; CHECK-NEXT: DXIL resource Information
+; CHECK-NEXT: DXIL Shader Flag Analysis
+; CHECK-NEXT: DXIL Translate Metadata
+; CHECK-NEXT: DXIL Prepare Module
+; CHECK-NEXT: DXIL Resource analysis
+; CHECK-NEXT: DXIL Metadata Pretty Printer
+; CHECK-NEXT: Print Module IR
+; CHECKIR: target triple = "dxilv1.3-pc-shadermodel6.3-library"
+; CHECKIR-LABEL: cos_sin_float_test
+define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) {
+ ; CHECKIR: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECKIR: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]])
+ ; CHECKIR: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECKIR: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]])
+ ; CHECKIR: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECKIR: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]])
+ ; CHECKIR: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECKIR: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]])
+ ; CHECKIR: [[ie4:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie0]])
+ ; CHECKIR: [[ie5:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie1]])
+ ; CHECKIR: [[ie6:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie2]])
+ ; CHECKIR: [[ie7:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie3]])
+ ; CHECKIR: insertelement <4 x float> poison, float [[ie4]], i64 0
+ ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie5]], i64 1
+ ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie6]], i64 2
+ ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie7]], i64 3
+ %2 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %a)
+ %3 = tail call <4 x float> @llvm.cos.v4f32(<4 x float> %2)
+ ret <4 x float> %3
+}
diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll
index 79143bfa0a5298..a0b0d2675e3b75 100644
--- a/llvm/test/CodeGen/DirectX/sin.ll
+++ b/llvm/test/CodeGen/DirectX/sin.ll
@@ -1,25 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for sin are generated for float and half.
-; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}})
-; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}})
-; Function Attrs: noinline nounwind optnone
-define noundef float @sin_float(float noundef %a) #0 {
+define noundef float @sin_float(float noundef %a) {
entry:
+ ; CHECK:call float @dx.op.unary.f32(i32 13, float %{{.*}})
%1 = call float @llvm.sin.f32(float %a)
ret float %1
}
-; Function Attrs: noinline nounwind optnone
-define noundef half @sin_half(half noundef %a) #0 {
+define noundef half @sin_half(half noundef %a) {
entry:
+ ; CHECK:call half @dx.op.unary.f16(i32 13, half %{{.*}})
%1 = call half @llvm.sin.f16(half %a)
ret half %1
}
-define noundef <4 x float> @sin_float4(<4 x float> noundef %a) #0 {
+define noundef <4 x float> @sin_float4(<4 x float> noundef %a) {
entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
%2 = call <4 x float> @llvm.sin.v4f32(<4 x float> %a)
ret <4 x float> %2
}
+
+declare half @llvm.sin.f16(half)
+declare float @llvm.sin.f32(float)
+declare <4 x float> @llvm.sin.v4f32(<4 x float>)
\ No newline at end of file
diff --git a/llvm/test/CodeGen/DirectX/sinh.ll b/llvm/test/CodeGen/DirectX/sinh.ll
index d4d3eda9eccb6c..deba726e8d9adc 100644
--- a/llvm/test/CodeGen/DirectX/sinh.ll
+++ b/llvm/test/CodeGen/DirectX/sinh.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for sinh are generated for float and half.
-define noundef float @tan_float(float noundef %a) {
+define noundef float @sinh_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 19, float %{{.*}})
%elt.sinh = call float @llvm.sinh.f32(float %a)
ret float %elt.sinh
}
-define noundef half @tan_half(half noundef %a) {
+define noundef half @sinh_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 19, half %{{.*}})
%elt.sinh = call half @llvm.sinh.f16(half %a)
ret half %elt.sinh
}
+define noundef <4 x float> @sinh_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 19, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.sinh.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.sinh.f16(half)
declare float @llvm.sinh.f32(float)
+declare <4 x float> @llvm.sinh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/sqrt.ll b/llvm/test/CodeGen/DirectX/sqrt.ll
index 792fbc8d0614d3..e2955b4efa2ec4 100644
--- a/llvm/test/CodeGen/DirectX/sqrt.ll
+++ b/llvm/test/CodeGen/DirectX/sqrt.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for sqrt are generated for float and half.
-define noundef float @sqrt_float(float noundef %a) #0 {
+define noundef float @sqrt_float(float noundef %a) {
entry:
; CHECK:call float @dx.op.unary.f32(i32 24, float %{{.*}})
%elt.sqrt = call float @llvm.sqrt.f32(float %a)
ret float %elt.sqrt
}
-define noundef half @sqrt_half(half noundef %a) #0 {
+define noundef half @sqrt_half(half noundef %a) {
entry:
; CHECK:call half @dx.op.unary.f16(i32 24, half %{{.*}})
%elt.sqrt = call half @llvm.sqrt.f16(half %a)
ret half %elt.sqrt
}
+define noundef <4 x float> @sqrt_float4(<4 x float> noundef %a) {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 24, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.sqrt.f16(half)
declare float @llvm.sqrt.f32(float)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/tan.ll b/llvm/test/CodeGen/DirectX/tan.ll
index 6f7beb592339a9..cf6965a95c04e1 100644
--- a/llvm/test/CodeGen/DirectX/tan.ll
+++ b/llvm/test/CodeGen/DirectX/tan.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for tan are generated for float and half.
@@ -16,5 +16,24 @@ entry:
ret half %elt.tan
}
+define noundef <4 x float> @tan_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 14, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.tan.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.tan.f16(half)
declare float @llvm.tan.f32(float)
+declare <4 x float> @llvm.tan.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/tanh.ll b/llvm/test/CodeGen/DirectX/tanh.ll
index e6642d9a74c8a3..54ec6f29fa0c3c 100644
--- a/llvm/test/CodeGen/DirectX/tanh.ll
+++ b/llvm/test/CodeGen/DirectX/tanh.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for tanh are generated for float and half.
@@ -16,5 +16,24 @@ entry:
ret half %elt.tanh
}
+define noundef <4 x float> @tanh_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 20, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.tanh.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.tanh.f16(half)
declare float @llvm.tanh.f32(float)
+declare <4 x float> @llvm.tanh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/trunc.ll b/llvm/test/CodeGen/DirectX/trunc.ll
index f00b737da4dbb3..6d9c222595c448 100644
--- a/llvm/test/CodeGen/DirectX/trunc.ll
+++ b/llvm/test/CodeGen/DirectX/trunc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for trunc are generated for float and half.
@@ -16,5 +16,24 @@ entry:
ret half %elt.trunc
}
+define noundef <4 x float> @trunc_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 29, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
declare half @llvm.trunc.f16(half)
declare float @llvm.trunc.f32(float)
+declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
diff --git a/llvm/tools/opt/optdriver.cpp b/llvm/tools/opt/optdriver.cpp
index 1bdfa71830ba22..c5bc7b43e03314 100644
--- a/llvm/tools/opt/optdriver.cpp
+++ b/llvm/tools/opt/optdriver.cpp
@@ -375,6 +375,7 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"fix-irreducible",
"expand-large-fp-convert",
"callbrprepare",
+ "scalarizer",
};
for (const auto &P : PassNamePrefix)
if (Pass.starts_with(P))
>From 070ee3688125e92d150707af0341ac6df47871d8 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzon at farzon.org>
Date: Fri, 6 Sep 2024 13:42:24 -0400
Subject: [PATCH 3/3] address pr comments
---
.../Target/DirectX/DirectXTargetMachine.cpp | 1 -
llvm/test/CodeGen/DirectX/llc-pipeline.ll | 25 +++++++++++
.../DirectX/scalarization_pass_order.ll | 45 -------------------
.../CodeGen/DirectX/scalarize-two-calls.ll | 25 +++++++++++
llvm/test/CodeGen/DirectX/sin.ll | 2 +-
5 files changed, 51 insertions(+), 47 deletions(-)
create mode 100644 llvm/test/CodeGen/DirectX/llc-pipeline.ll
delete mode 100644 llvm/test/CodeGen/DirectX/scalarization_pass_order.ll
create mode 100644 llvm/test/CodeGen/DirectX/scalarize-two-calls.ll
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index f021e24ac7e26e..606022a9835f04 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -87,7 +87,6 @@ class DirectXPassConfig : public TargetPassConfig {
void addCodeGenPrepare() override {
addPass(createDXILIntrinsicExpansionLegacyPass());
ScalarizerPassOptions DxilScalarOptions;
- // The only non-default option we need to set is ScalarizeLoadStore.
DxilScalarOptions.ScalarizeLoadStore = true;
addPass(createScalarizerPass(DxilScalarOptions));
addPass(createDXILOpLoweringLegacyPass());
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
new file mode 100644
index 00000000000000..36610bef719bf0
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | \
+; RUN: grep -v "Verify generated machine code" | FileCheck %s
+
+; REQUIRES: asserts
+
+; CHECK-LABEL: Pass Arguments:
+; CHECK-NEXT: Target Library Information
+; CHECK-NEXT: ModulePass Manager
+; CHECK-NEXT: DXIL Intrinsic Expansion
+; CHECK-NEXT: FunctionPass Manager
+; CHECK-NEXT: Dominator Tree Construction
+; CHECK-NEXT: Scalarize vector operations
+; CHECK-NEXT: DXIL Intrinsic Expansion
+; CHECK-NEXT: DXIL Resource analysis
+; CHECK-NEXT: DXIL Op Lowering
+; CHECK-NEXT: DXIL Finalize Linkage
+; CHECK-NEXT: DXIL Resource analysis
+; CHECK-NEXT: DXIL resource Information
+; CHECK-NEXT: DXIL Shader Flag Analysis
+; CHECK-NEXT: DXIL Translate Metadata
+; CHECK-NEXT: DXIL Prepare Module
+; CHECK-NEXT: DXIL Resource analysis
+; CHECK-NEXT: DXIL Metadata Pretty Printer
+; CHECK-NEXT: Print Module IR
+
diff --git a/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll b/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll
deleted file mode 100644
index f33c2a7ccdcef2..00000000000000
--- a/llvm/test/CodeGen/DirectX/scalarization_pass_order.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; RUN: llc -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | \
-; RUN: grep -v "Verify generated machine code" | FileCheck %s
-; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s --check-prefixes=CHECKIR
-; CHECK-LABEL: Pass Arguments:
-; CHECK-NEXT: Target Library Information
-; CHECK-NEXT: ModulePass Manager
-; CHECK-NEXT: DXIL Intrinsic Expansion
-; CHECK-NEXT: FunctionPass Manager
-; CHECK-NEXT: Dominator Tree Construction
-; CHECK-NEXT: Scalarize vector operations
-; CHECK-NEXT: DXIL Intrinsic Expansion
-; CHECK-NEXT: DXIL Resource analysis
-; CHECK-NEXT: DXIL Op Lowering
-; CHECK-NEXT: DXIL Finalize Linkage
-; CHECK-NEXT: DXIL Resource analysis
-; CHECK-NEXT: DXIL resource Information
-; CHECK-NEXT: DXIL Shader Flag Analysis
-; CHECK-NEXT: DXIL Translate Metadata
-; CHECK-NEXT: DXIL Prepare Module
-; CHECK-NEXT: DXIL Resource analysis
-; CHECK-NEXT: DXIL Metadata Pretty Printer
-; CHECK-NEXT: Print Module IR
-; CHECKIR: target triple = "dxilv1.3-pc-shadermodel6.3-library"
-; CHECKIR-LABEL: cos_sin_float_test
-define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) {
- ; CHECKIR: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
- ; CHECKIR: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]])
- ; CHECKIR: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
- ; CHECKIR: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]])
- ; CHECKIR: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
- ; CHECKIR: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]])
- ; CHECKIR: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
- ; CHECKIR: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]])
- ; CHECKIR: [[ie4:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie0]])
- ; CHECKIR: [[ie5:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie1]])
- ; CHECKIR: [[ie6:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie2]])
- ; CHECKIR: [[ie7:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie3]])
- ; CHECKIR: insertelement <4 x float> poison, float [[ie4]], i64 0
- ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie5]], i64 1
- ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie6]], i64 2
- ; CHECKIR: insertelement <4 x float> %{{.*}}, float [[ie7]], i64 3
- %2 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %a)
- %3 = tail call <4 x float> @llvm.cos.v4f32(<4 x float> %2)
- ret <4 x float> %3
-}
diff --git a/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll
new file mode 100644
index 00000000000000..a14c1de5cc4205
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll
@@ -0,0 +1,25 @@
+; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
+
+; CHECK: target triple = "dxilv1.3-pc-shadermodel6.3-library"
+; CHECK-LABEL: cos_sin_float_test
+define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) {
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee3]])
+ ; CHECK: [[ie4:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie0]])
+ ; CHECK: [[ie5:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie1]])
+ ; CHECK: [[ie6:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie2]])
+ ; CHECK: [[ie7:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ie3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie4]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie5]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie6]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie7]], i64 3
+ %2 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %a)
+ %3 = tail call <4 x float> @llvm.cos.v4f32(<4 x float> %2)
+ ret <4 x float> %3
+}
diff --git a/llvm/test/CodeGen/DirectX/sin.ll b/llvm/test/CodeGen/DirectX/sin.ll
index a0b0d2675e3b75..ac6b217be80e75 100644
--- a/llvm/test/CodeGen/DirectX/sin.ll
+++ b/llvm/test/CodeGen/DirectX/sin.ll
@@ -36,4 +36,4 @@ entry:
declare half @llvm.sin.f16(half)
declare float @llvm.sin.f32(float)
-declare <4 x float> @llvm.sin.v4f32(<4 x float>)
\ No newline at end of file
+declare <4 x float> @llvm.sin.v4f32(<4 x float>)
More information about the llvm-commits
mailing list