[llvm] [LegacyPM][DirectX] Add legacy scalarizer back for use in the DirectX backend (PR #107427)

Wed Sep 11 17:09:48 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-backend-directx

@llvm/pr-subscribers-llvm-transforms

Author: Farzon Lotfi (farzonl)

<details>
<summary>Changes</summary>

As discussed in this [proposal](https://github.com/llvm/wg-hlsl/pull/62/files?short_path=ac6e592#diff-ac6e59276afe8016e307eedc5c835f534c0cb353707760b44df0fa9d905a5cf8). We had to bring back the legacy pass manager interface for the scalarizer pass. Two reasons for this:
1. The DirectX backend is still using the legacy pass manager
2. The new PM isn't hooked up in clang yet via `BackendUtil.cpp`'s `AddEmitPasses` That means even if we add a `buildCodeGenPipeline`  we won't be able to benefit from the new pass manager's scalarizer pass interface.

The remaining changes are hooking up the scalarizer pass to the DirectX backend, updating the DirectX test cases,
and allowing the `optdriver` to not block the legacy invocation of the scalarizer pass.

Future work still needs to be done to allow the scalarizer  pass to handle target specific intrinsics.

closes #105178

---

Patch is 47.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107427.diff


29 Files Affected:

- (modified) llvm/include/llvm/InitializePasses.h (+1) 
- (modified) llvm/include/llvm/LinkAllPasses.h (+1) 
- (modified) llvm/include/llvm/Transforms/Scalar/Scalarizer.h (+5) 
- (modified) llvm/lib/Target/DirectX/DirectXTargetMachine.cpp (+6) 
- (modified) llvm/lib/Transforms/Scalar/Scalar.cpp (+1) 
- (modified) llvm/lib/Transforms/Scalar/Scalarizer.cpp (+39) 
- (modified) llvm/test/CodeGen/DirectX/acos.ll (+22-3) 
- (modified) llvm/test/CodeGen/DirectX/asin.ll (+22-3) 
- (modified) llvm/test/CodeGen/DirectX/atan.ll (+22-3) 
- (modified) llvm/test/CodeGen/DirectX/ceil.ll (+20-1) 
- (modified) llvm/test/CodeGen/DirectX/cos.ll (+20-1) 
- (modified) llvm/test/CodeGen/DirectX/cosh.ll (+22-3) 
- (modified) llvm/test/CodeGen/DirectX/exp2.ll (+29-21) 
- (modified) llvm/test/CodeGen/DirectX/fabs.ll (+21-1) 
- (modified) llvm/test/CodeGen/DirectX/floor.ll (+21-2) 
- (modified) llvm/test/CodeGen/DirectX/isinf.ll (+10-14) 
- (added) llvm/test/CodeGen/DirectX/llc-pipeline.ll (+25) 
- (modified) llvm/test/CodeGen/DirectX/reversebits.ll (+20-1) 
- (modified) llvm/test/CodeGen/DirectX/round.ll (+21-1) 
- (modified) llvm/test/CodeGen/DirectX/saturate.ll (+6-11) 
- (added) llvm/test/CodeGen/DirectX/scalar-store.ll (+17) 
- (added) llvm/test/CodeGen/DirectX/scalarize-two-calls.ll (+25) 
- (modified) llvm/test/CodeGen/DirectX/sin.ll (+29-15) 
- (modified) llvm/test/CodeGen/DirectX/sinh.ll (+22-3) 
- (modified) llvm/test/CodeGen/DirectX/sqrt.ll (+22-3) 
- (modified) llvm/test/CodeGen/DirectX/tan.ll (+20-1) 
- (modified) llvm/test/CodeGen/DirectX/tanh.ll (+20-1) 
- (modified) llvm/test/CodeGen/DirectX/trunc.ll (+20-1) 
- (modified) llvm/tools/opt/optdriver.cpp (+1) 


``````````diff

diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 6605c6fde92510..4352099d6dbb99 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -276,6 +276,7 @@ void initializeSafepointIRVerifierPass(PassRegistry &);
 void initializeSelectOptimizePass(PassRegistry &);
 void initializeScalarEvolutionWrapperPassPass(PassRegistry &);
 void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &);
+void initializeScalarizerLegacyPassPass(PassRegistry &);
 void initializeScavengerTestPass(PassRegistry &);
 void initializeScopedNoAliasAAWrapperPassPass(PassRegistry &);
 void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 1da02153d846f1..92b59a66567c95 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -130,6 +130,7 @@ struct ForcePassLinking {
     (void)llvm::createLowerAtomicPass();
     (void)llvm::createLoadStoreVectorizerPass();
     (void)llvm::createPartiallyInlineLibCallsPass();
+    (void)llvm::createScalarizerPass();
     (void)llvm::createSeparateConstOffsetFromGEPPass();
     (void)llvm::createSpeculativeExecutionPass();
     (void)llvm::createSpeculativeExecutionIfHasBranchDivergencePass();
diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
index 45e25cbf282149..4d2a1a2f889a3c 100644
--- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -24,6 +24,7 @@
 namespace llvm {
 
 class Function;
+class FunctionPass;
 
 struct ScalarizerPassOptions {
   // These options correspond 1:1 to cl::opt options defined in
@@ -50,6 +51,10 @@ class ScalarizerPass : public PassInfoMixin<ScalarizerPass> {
   void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; }
   void setScalarizeMinBits(unsigned Value) { Options.ScalarizeMinBits = Value; }
 };
+
+/// Create a legacy pass manager instance of the Scalarizer pass
+FunctionPass *createScalarizerPass(
+    const ScalarizerPassOptions &Options = ScalarizerPassOptions());
 }
 
 #endif /* LLVM_TRANSFORMS_SCALAR_SCALARIZER_H */
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index a29fc210421637..606022a9835f04 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/MC/MCSectionDXContainer.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/MC/TargetRegistry.h"
@@ -36,6 +37,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
 #include <optional>
 
 using namespace llvm;
@@ -44,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
   RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget());
   auto *PR = PassRegistry::getPassRegistry();
   initializeDXILIntrinsicExpansionLegacyPass(*PR);
+  initializeScalarizerLegacyPassPass(*PR);
   initializeDXILPrepareModulePass(*PR);
   initializeEmbedDXILPassPass(*PR);
   initializeWriteDXILPassPass(*PR);
@@ -83,6 +86,9 @@ class DirectXPassConfig : public TargetPassConfig {
   FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
   void addCodeGenPrepare() override {
     addPass(createDXILIntrinsicExpansionLegacyPass());
+    ScalarizerPassOptions DxilScalarOptions;
+    DxilScalarOptions.ScalarizeLoadStore = true;
+    addPass(createScalarizerPass(DxilScalarOptions));
     addPass(createDXILOpLoweringLegacyPass());
     addPass(createDXILFinalizeLinkageLegacyPass());
     addPass(createDXILTranslateMetadataLegacyPass());
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 7aeee1d31f7e79..fa6e671830d962 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
 void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeConstantHoistingLegacyPassPass(Registry);
   initializeDCELegacyPassPass(Registry);
+  initializeScalarizerLegacyPassPass(Registry);
   initializeGVNLegacyPassPass(Registry);
   initializeEarlyCSELegacyPassPass(Registry);
   initializeEarlyCSEMemSSALegacyPassPass(Registry);
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 2bed3480da1cda..01d24335df2262 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -36,6 +36,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -340,8 +341,33 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
   const unsigned ScalarizeMinBits;
 };
 
+class ScalarizerLegacyPass : public FunctionPass {
+public:
+  static char ID;
+  ScalarizerPassOptions Options;
+  ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
+  ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
 } // end anonymous namespace
 
+ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
+    : FunctionPass(ID), Options(Options) {}
+
+void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+char ScalarizerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
+                      "Scalarize vector operations", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
+                    "Scalarize vector operations", false, false)
+
 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
                      const VectorSplit &VS, ValueVector *cachePtr)
     : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
@@ -414,6 +440,19 @@ Value *Scatterer::operator[](unsigned Frag) {
   return CV[Frag];
 }
 
+bool ScalarizerLegacyPass::runOnFunction(Function &F) {
+  if (skipFunction(F))
+    return false;
+
+  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  ScalarizerVisitor Impl(DT, Options);
+  return Impl.visit(F);
+}
+
+FunctionPass *llvm::createScalarizerPass(const ScalarizerPassOptions &Options) {
+  return new ScalarizerLegacyPass(Options);
+}
+
 bool ScalarizerVisitor::visit(Function &F) {
   assert(Gathered.empty() && Scattered.empty());
 
diff --git a/llvm/test/CodeGen/DirectX/acos.ll b/llvm/test/CodeGen/DirectX/acos.ll
index cc32182395627c..f4a10eb368ebfb 100644
--- a/llvm/test/CodeGen/DirectX/acos.ll
+++ b/llvm/test/CodeGen/DirectX/acos.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for acos are generated for float and half.
 
-define noundef float @tan_float(float noundef %a) {
+define noundef float @acos_float(float noundef %a) {
 entry:
 ; CHECK:call float @dx.op.unary.f32(i32 15, float %{{.*}})
   %elt.acos = call float @llvm.acos.f32(float %a)
   ret float %elt.acos
 }
 
-define noundef half @tan_half(half noundef %a) {
+define noundef half @acos_half(half noundef %a) {
 entry:
 ; CHECK:call half @dx.op.unary.f16(i32 15, half %{{.*}})
   %elt.acos = call half @llvm.acos.f16(half %a)
   ret half %elt.acos
 }
 
+define noundef <4 x float> @acos_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 15, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.acos.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.acos.f16(half)
 declare float @llvm.acos.f32(float)
+declare <4 x float> @llvm.acos.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/asin.ll b/llvm/test/CodeGen/DirectX/asin.ll
index 06e3bab545a6aa..bd948f593c24e2 100644
--- a/llvm/test/CodeGen/DirectX/asin.ll
+++ b/llvm/test/CodeGen/DirectX/asin.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for asin are generated for float and half.
 
-define noundef float @tan_float(float noundef %a) {
+define noundef float @asin_float(float noundef %a) {
 entry:
 ; CHECK:call float @dx.op.unary.f32(i32 16, float %{{.*}})
   %elt.asin = call float @llvm.asin.f32(float %a)
   ret float %elt.asin
 }
 
-define noundef half @tan_half(half noundef %a) {
+define noundef half @asin_half(half noundef %a) {
 entry:
 ; CHECK:call half @dx.op.unary.f16(i32 16, half %{{.*}})
   %elt.asin = call half @llvm.asin.f16(half %a)
   ret half %elt.asin
 }
 
+define noundef <4 x float> @asin_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 16, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.asin.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.asin.f16(half)
 declare float @llvm.asin.f32(float)
+declare <4 x float> @llvm.asin.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/atan.ll b/llvm/test/CodeGen/DirectX/atan.ll
index d7c4cd00e286a0..58899ab49bdb8e 100644
--- a/llvm/test/CodeGen/DirectX/atan.ll
+++ b/llvm/test/CodeGen/DirectX/atan.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for atan are generated for float and half.
 
-define noundef float @tan_float(float noundef %a) {
+define noundef float @atan_float(float noundef %a) {
 entry:
 ; CHECK:call float @dx.op.unary.f32(i32 17, float %{{.*}})
   %elt.atan = call float @llvm.atan.f32(float %a)
   ret float %elt.atan
 }
 
-define noundef half @tan_half(half noundef %a) {
+define noundef half @atan_half(half noundef %a) {
 entry:
 ; CHECK:call half @dx.op.unary.f16(i32 17, half %{{.*}})
   %elt.atan = call half @llvm.atan.f16(half %a)
   ret half %elt.atan
 }
 
+define noundef <4 x float> @atan_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 17, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.atan.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.atan.f16(half)
 declare float @llvm.atan.f32(float)
+declare <4 x float> @llvm.atan.v4f32(<4 x float>) 
diff --git a/llvm/test/CodeGen/DirectX/ceil.ll b/llvm/test/CodeGen/DirectX/ceil.ll
index 48bc5495a8e051..bd6e747c2fbf5f 100644
--- a/llvm/test/CodeGen/DirectX/ceil.ll
+++ b/llvm/test/CodeGen/DirectX/ceil.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for ceil are generated for float and half.
 
@@ -16,5 +16,24 @@ entry:
   ret half %elt.ceil
 }
 
+define noundef <4 x float> @ceil_float4(<4 x float> noundef %a) {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 28, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.ceil.f16(half)
 declare float @llvm.ceil.f32(float)
+declare <4 x float> @llvm.ceil.v4f32(<4 x float>) 
diff --git a/llvm/test/CodeGen/DirectX/cos.ll b/llvm/test/CodeGen/DirectX/cos.ll
index 72f4bfca23f9d5..85f5db25570b90 100644
--- a/llvm/test/CodeGen/DirectX/cos.ll
+++ b/llvm/test/CodeGen/DirectX/cos.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for cos are generated for float and half.
 
@@ -16,5 +16,24 @@ entry:
   ret half %elt.cos
 }
 
+define noundef <4 x float> @cos_float4(<4 x float> noundef %a) #0 {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 12, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.cos.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.cos.f16(half)
 declare float @llvm.cos.f32(float)
+declare <4 x float> @llvm.cos.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/cosh.ll b/llvm/test/CodeGen/DirectX/cosh.ll
index 91aaf893f3997c..670a8a3eae0864 100644
--- a/llvm/test/CodeGen/DirectX/cosh.ll
+++ b/llvm/test/CodeGen/DirectX/cosh.ll
@@ -1,20 +1,39 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for cosh are generated for float and half.
 
-define noundef float @tan_float(float noundef %a) {
+define noundef float @cosh_float(float noundef %a) {
 entry:
 ; CHECK:call float @dx.op.unary.f32(i32 18, float %{{.*}})
   %elt.cosh = call float @llvm.cosh.f32(float %a)
   ret float %elt.cosh
 }
 
-define noundef half @tan_half(half noundef %a) {
+define noundef half @cosh_half(half noundef %a) {
 entry:
 ; CHECK:call half @dx.op.unary.f16(i32 18, half %{{.*}})
   %elt.cosh = call half @llvm.cosh.f16(half %a)
   ret half %elt.cosh
 }
 
+define noundef <4 x float> @cosh_float4(<4 x float> noundef %a) #0 {
+entry:
+  ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+  ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee0]])
+  ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+  ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee1]])
+  ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+  ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee2]])
+  ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+  ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 18, float [[ee3]])
+  ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+  ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+  %2 = call <4 x float> @llvm.cosh.v4f32(<4 x float> %a) 
+  ret <4 x float> %2
+}
+
 declare half @llvm.cosh.f16(half)
 declare float @llvm.cosh.f32(float)
+declare <4 x float> @llvm.cosh.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/exp2.ll b/llvm/test/CodeGen/DirectX/exp2.ll
index b70b87dedc4d1e..6d16af6a5413e0 100644
--- a/llvm/test/CodeGen/DirectX/exp2.ll
+++ b/llvm/test/CodeGen/DirectX/exp2.ll
@@ -1,31 +1,39 @@
-; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.7-library %s | FileCheck %s
 
 ; Make sure dxil operation function calls for exp2 are generated for float and half.
-; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}})
-; CHECK:call half @dx.op.unary.f16(i32 21, half %{{.*}})
 
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.7-library"
-
-; Function Attrs: noinline nounwind optnone
-define noundef float @exp2_float(float noundef %a) #0 {
+define noundef float @exp2_float(float noundef %a) {
 entry:
-  %a.addr = alloca float, align 4
-  store float %a, ptr %a.addr, align 4
-  %0 = load float, ptr %a.addr, align 4
-  %elt.exp2 = call float @llvm.exp2.f32(float %0)
+  ; CHECK:call float @dx.op.unary.f32(i32 21, float %{{.*}})
+  %elt.exp2 = call float @llvm.exp2.f32(float %a)
   ret float %elt.exp2
 }
 
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.exp2.f32(float) #1
-
-; Function Attrs: noinline nounwind optnone
-define noundef half @exp2_half(half noundef %a) #0 {
+define noundef half @exp2_half(half noundef %a) {
 entry:
-  %a.addr = alloca half, align 2
-  store half %a, ptr %a.addr, align 2
-  %0 = load half, ptr %a.addr, align ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/107427