[llvm] [DirectX] Add support to lower LLVM intrinsics ceil, cos, fabs, floor and smax to DXIL Ops. (PR #78767)

Fri Jan 19 11:34:31 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-directx

Author: S. Bharadwaj Yadavalli (bharadwajy)

<details>
<summary>Changes</summary>

Also add a DXILStrengthReduce pass to facilitate rewriting LLVM IR in preparation for DXIL lowering. Moved rewriting of FNeg from DXILModulePrepare to this pass and added support to rewrite abs intrinsic to the pass.

Add tests for each of the instructions being newly lowered and rewritten.

---

Patch is 29.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78767.diff


13 Files Affected:

- (modified) llvm/lib/Target/DirectX/CMakeLists.txt (+1) 
- (modified) llvm/lib/Target/DirectX/DXIL.td (+71-23) 
- (modified) llvm/lib/Target/DirectX/DXILOpLowering.cpp (+2) 
- (modified) llvm/lib/Target/DirectX/DXILPrepare.cpp (+3-12) 
- (added) llvm/lib/Target/DirectX/DXILStrengthReduce.cpp (+135) 
- (modified) llvm/lib/Target/DirectX/DirectX.h (+6) 
- (modified) llvm/lib/Target/DirectX/DirectXTargetMachine.cpp (+2) 
- (added) llvm/test/CodeGen/DirectX/abs.ll (+69) 
- (added) llvm/test/CodeGen/DirectX/ceil.ll (+42) 
- (added) llvm/test/CodeGen/DirectX/cos.ll (+43) 
- (added) llvm/test/CodeGen/DirectX/fabs.ll (+73) 
- (added) llvm/test/CodeGen/DirectX/floor.ll (+42) 
- (added) llvm/test/CodeGen/DirectX/smax.ll (+30) 


``````````diff

diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt
index bf93280779bf8be..ef49fc9fa94491f 100644
--- a/llvm/lib/Target/DirectX/CMakeLists.txt
+++ b/llvm/lib/Target/DirectX/CMakeLists.txt
@@ -27,6 +27,7 @@ add_llvm_target(DirectXCodeGen
   DXILResource.cpp
   DXILResourceAnalysis.cpp
   DXILShaderFlags.cpp
+  DXILStrengthReduce.cpp
   DXILTranslateMetadata.cpp
 
   LINK_COMPONENTS
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 709279889653b8d..c71464561fafe01 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -28,6 +28,7 @@ def ThreadIdClass : dxil_class<"ThreadId">;
 def GroupIdClass : dxil_class<"GroupId">;
 
 def binary_uint : dxil_category<"Binary uint">;
+def binary_int : dxil_category<"Binary int">;
 def unary_float : dxil_category<"Unary float">;
 def ComputeID : dxil_category<"Compute/Mesh/Amplification shader">;
 
@@ -86,31 +87,78 @@ class dxil_op<string name, int code_id, dxil_class code_class, dxil_category op_
   let stats_group = _stats_group;
 }
 
-// The intrinsic which map directly to this dxil op.
+// Intrinsic that maps directly to this dxil op.
 class dxil_map_intrinsic<Intrinsic llvm_intrinsic_> { Intrinsic llvm_intrinsic = llvm_intrinsic_; }
 
-def Sin : dxil_op<"Sin", 13, Unary, unary_float, "returns sine(theta) for theta in radians.",
-  "half;float;", "rn",
-  [
-    dxil_param<0, "$o", "", "operation result">,
-    dxil_param<1, "i32", "opcode", "DXIL opcode">,
-    dxil_param<2, "$o", "value", "input value">
-  ],
-  ["floats"]>,
-  dxil_map_intrinsic<int_sin>;
-
-def UMax :dxil_op< "UMax", 39,  Binary,  binary_uint, "unsigned integer maximum. UMax(a,b) = a > b ? a : b",
-    "i16;i32;i64;",  "rn",
-  [
-    dxil_param<0,  "$o",  "",  "operation result">,
-    dxil_param<1,  "i32",  "opcode",  "DXIL opcode">,
-    dxil_param<2,  "$o",  "a",  "input value">,
-    dxil_param<3,  "$o",  "b",  "input value">
-  ],
-  ["uints"]>,
-  dxil_map_intrinsic<int_umax>;
-
-def ThreadId :dxil_op< "ThreadId", 93,  ThreadIdClass, ComputeID, "reads the thread ID", "i32;",  "rn",
+def Fabs : dxil_op<"Fabs", 6, Unary, unary_float,
+                              "returns the absolute value of the input value.", 
+                              "half;float;double;", "rn",
+                              [
+                               dxil_param<0, "$o", "", "operation result">,
+                               dxil_param<1, "i32", "opcode", "DXIL opcode">,
+                               dxil_param<2, "$o", "value", "input value">
+                              ], ["floats"]>,
+                              dxil_map_intrinsic<int_fabs>;
+def Cos : dxil_op<"Cos", 12, Unary, unary_float,
+                              "returns cosine(theta) for theta in radians.", 
+                              "half;float;double;", "rn",
+                              [
+                               dxil_param<0, "$o", "", "operation result">,
+                               dxil_param<1, "i32", "opcode", "DXIL opcode">,
+                               dxil_param<2, "$o", "value", "input value">
+                              ], ["floats"]>,
+                              dxil_map_intrinsic<int_cos>;
+def Sin : dxil_op<"Sin", 13, Unary, unary_float,
+                              "returns sine(theta) for theta in radians.", 
+                              "half;float;double;", "rn",
+                              [
+                               dxil_param<0, "$o", "", "operation result">,
+                               dxil_param<1, "i32", "opcode", "DXIL opcode">,
+                               dxil_param<2, "$o", "value", "input value">
+                              ], ["floats"]>,
+                              dxil_map_intrinsic<int_sin>;
+
+def Round_ni : dxil_op<"Round_ni", 27, Unary, unary_float,
+                         "rounds towards -INF, commonly known as floor()", "float;double;",
+                         "rn",
+                         [
+                           dxil_param<0, "$o", "", "operation result">,
+                           dxil_param<1, "i32", "opcode", "DXIL opcode">,
+                           dxil_param<2, "$o", "value", "input value">
+                         ], ["floats"]>,
+                         dxil_map_intrinsic<int_floor>;
+
+def Round_pi : dxil_op<"Round_pi", 28, Unary, unary_float,
+                         "return natural log (log base e)", "float;double;", "rn",
+                         [
+                          dxil_param<0, "$o", "", "operation result">,
+                          dxil_param<1, "i32", "opcode", "DXIL opcode">,
+                          dxil_param<2, "$o", "value", "input value">
+                         ], ["floats"]>,
+                         dxil_map_intrinsic<int_ceil>;
+
+def IMax : dxil_op<"IMax", 37, Binary, binary_int,
+                               "IMax(a,b) returns a if a > b, else b", "i32;i64;", "rn",
+                                [
+                                  dxil_param<0,  "$o",  "",  "operation result">,
+                                  dxil_param<1,  "i32",  "opcode",  "DXIL opcode">,
+                                  dxil_param<2,  "$o",  "a",  "input value">,
+                                  dxil_param<3,  "$o",  "b",  "input value">
+                                ], ["ints"]>,
+                                dxil_map_intrinsic<int_smax>;
+
+def UMax : dxil_op<"UMax", 39, Binary, binary_uint,
+                               "unsigned integer maximum. UMax(a,b) = a > b ? a : b", "i32;i64;", "rn",
+                               [
+                                  dxil_param<0,  "$o",  "",  "operation result">,
+                                  dxil_param<1,  "i32",  "opcode",  "DXIL opcode">,
+                                  dxil_param<2,  "$o",  "a",  "input value">,
+                                  dxil_param<3,  "$o",  "b",  "input value">
+                                ], ["uints"]>,
+                               dxil_map_intrinsic<int_umax>;
+
+// ThreadID and GroupId
+def ThreadId : dxil_op< "ThreadId", 93,  ThreadIdClass, ComputeID, "reads the thread ID", "i32;",  "rn",
   [
     dxil_param<0,  "i32",  "",  "thread ID component">,
     dxil_param<1,  "i32",  "opcode",  "DXIL opcode">,
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index f6e2297e9af41fc..cbfd65e27983f3d 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -45,6 +45,8 @@ static void lowerIntrinsic(dxil::OpCode DXILOp, Function &F, Module &M) {
     Args.append(CI->arg_begin(), CI->arg_end());
     B.SetInsertPoint(CI);
     CallInst *DXILCI = DXILB.createDXILOpCall(DXILOp, OverloadTy, CI->args());
+    // Retain tail call property
+    DXILCI->setTailCall(CI->isTailCall());
 
     CI->replaceAllUsesWith(DXILCI);
     CI->eraseFromParent();
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index 026911946b47f00..aa0012114548a09 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 ///
-/// \file This file contains pases and utilities to convert a modern LLVM
+/// \file This file contains passes and utilities to convert a modern LLVM
 /// module into a module compatible with the LLVM 3.7-based DirectX Intermediate
 /// Language (DXIL).
 //===----------------------------------------------------------------------===//
@@ -119,17 +119,8 @@ class DXILPrepareModule : public ModulePass {
       for (auto &BB : F) {
         IRBuilder<> Builder(&BB);
         for (auto &I : make_early_inc_range(BB)) {
-          if (I.getOpcode() == Instruction::FNeg) {
-            Builder.SetInsertPoint(&I);
-            Value *In = I.getOperand(0);
-            Value *Zero = ConstantFP::get(In->getType(), -0.0);
-            I.replaceAllUsesWith(Builder.CreateFSub(Zero, In));
-            I.eraseFromParent();
-            continue;
-          }
-
-          // Emtting NoOp bitcast instructions allows the ValueEnumerator to be
-          // unmodified as it reserves instruction IDs during contruction.
+          // Emitting NoOp bitcast instructions allows the ValueEnumerator to be
+          // unmodified as it reserves instruction IDs during construction.
           if (auto LI = dyn_cast<LoadInst>(&I)) {
             if (Value *NoOpBitcast = maybeGenerateBitcast(
                     Builder, PointerTypes, I, LI->getPointerOperand(),
diff --git a/llvm/lib/Target/DirectX/DXILStrengthReduce.cpp b/llvm/lib/Target/DirectX/DXILStrengthReduce.cpp
new file mode 100644
index 000000000000000..55ccad5f539b367
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DXILStrengthReduce.cpp
@@ -0,0 +1,135 @@
+//===- DXILStrengthReduce.cpp - Prepare LLVM Module for DXIL encoding------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file This file contains strength reduction pass to convert a modern LLVM
+/// module into an LLVM module with LLVM intrinsics amenable for lowering to
+/// LLVM 3.7-based DirectX Intermediate Language (DXIL).
+//===----------------------------------------------------------------------===//
+
+#include "DirectX.h"
+#include "DirectXIRPasses/PointerTypeAnalysis.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/AttributeMask.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+
+#define DEBUG_TYPE "dxil-strength-reduce"
+
+using namespace llvm;
+using namespace llvm::dxil;
+
+namespace {
+class DXILStrengthReduce : public ModulePass {
+
+public:
+  bool runOnModule(Module &M) override {
+    for (auto &F : make_early_inc_range(M.functions())) {
+      IRBuilder<> IRB(M.getContext());
+      // Reduce strength of LLVM intrinsics
+      // Flag to indicate if the intrinsic has been replaced. This ensures any
+      // other functions with no uses are not deleted in this pass.
+      bool IntrinsicReplaced = false;
+      if (F.isDeclaration()) {
+        Intrinsic::ID IntrinsicId = F.getIntrinsicID();
+        // Convert
+        //    %ret = call i32 @llvm.abs.i32(i32 %arg, i1 false)
+        // to
+        //    %NegArg = sub 0, %arg
+        //    %ret = call i32 @llvm.imax(NegArg, %arg)
+        if (IntrinsicId == Intrinsic::abs) {
+          // Get to uses of the intrinsic
+          for (User *U : make_early_inc_range(F.users())) {
+            auto *IntrinsicCall = dyn_cast<CallInst>(U);
+            if (!IntrinsicCall)
+              continue;
+            Value *Input = IntrinsicCall->getOperand(0);
+            Value *Poison = IntrinsicCall->getOperand(1);
+
+            // Get Poison argument value
+            const ConstantInt *CI = dyn_cast<ConstantInt>(Poison);
+            assert(
+                CI != nullptr &&
+                "Expect second argument of abs intrinsic to be constant type.");
+            assert(CI->getType()->isIntegerTy(1) &&
+                   "Expect second argument of abs intrinsic to be constant int "
+                   "type.");
+            bool isPoison = CI->getZExtValue();
+
+            // Construct the Instruction sub(0, Input)
+            Value *ZeroValue = ConstantInt::get(Input->getType(), 0);
+            IRB.SetInsertPoint(IntrinsicCall);
+            auto *SubInst =
+                IRB.CreateSub(ZeroValue, Input, "NegArg", isPoison, isPoison);
+
+            // Replace
+            //   call i32 @llvm.abs.i32(i32 %arg, i1 false)
+            // with
+            //   call i32 @llvm.max.i32(i32 %NegArg, %arg)
+            // Generate Intrinsic function call
+            Value *IntrinsicCallArgs[] = {Input, SubInst};
+            auto *IMaxCall = IRB.CreateIntrinsic(
+                Input->getType(), Intrinsic::smax,
+                ArrayRef<Value *>(IntrinsicCallArgs), nullptr, "IMax");
+            // Retain the tail call and attributes of the intrinsic being
+            // replaced.
+            IMaxCall->setTailCall(IntrinsicCall->isTailCall());
+            IMaxCall->setAttributes(IntrinsicCall->getAttributes());
+            IntrinsicCall->replaceAllUsesWith(IMaxCall);
+            IntrinsicCall->eraseFromParent();
+            IntrinsicReplaced = true;
+          }
+        }
+        if (F.user_empty() && IntrinsicReplaced)
+          F.eraseFromParent();
+
+      } else {
+        // Reduce strength of instructions
+        for (auto &BB : F) {
+          IRBuilder<> Builder(&BB);
+          for (auto &I : make_early_inc_range(BB)) {
+            // Rewrite
+            //    %nval = fneg double %val
+            // to
+            //    %nval = fsub double -0.000000e+00, %val
+
+            if (I.getOpcode() == Instruction::FNeg) {
+              Builder.SetInsertPoint(&I);
+              Value *In = I.getOperand(0);
+              Value *Zero = ConstantFP::get(In->getType(), -0.0);
+              I.replaceAllUsesWith(Builder.CreateFSub(Zero, In));
+              I.eraseFromParent();
+            }
+          }
+        }
+      }
+    }
+    return true;
+  }
+
+  DXILStrengthReduce() : ModulePass(ID) {}
+
+  static char ID; // Pass identification.
+};
+char DXILStrengthReduce::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(DXILStrengthReduce, DEBUG_TYPE, "DXIL Strength Reduce",
+                      false, false)
+INITIALIZE_PASS_END(DXILStrengthReduce, DEBUG_TYPE, "DXIL Strength Reduce",
+                    false, false)
+
+ModulePass *llvm::createDXILStrengthReducePass() {
+  return new DXILStrengthReduce();
+}
diff --git a/llvm/lib/Target/DirectX/DirectX.h b/llvm/lib/Target/DirectX/DirectX.h
index eaecc3ac280c4cc..51f2791ba9cc0b0 100644
--- a/llvm/lib/Target/DirectX/DirectX.h
+++ b/llvm/lib/Target/DirectX/DirectX.h
@@ -28,6 +28,12 @@ void initializeDXILPrepareModulePass(PassRegistry &);
 /// Pass to convert modules into DXIL-compatable modules
 ModulePass *createDXILPrepareModulePass();
 
+/// Initializer for DXIL strength reduce
+void initializeDXILStrengthReducePass(PassRegistry &);
+
+/// Pass to reduce strength during lowering into DXIL-compatable modules
+ModulePass *createDXILStrengthReducePass();
+
 /// Initializer for DXILOpLowering
 void initializeDXILOpLoweringLegacyPass(PassRegistry &);
 
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index 06938f8c74f155e..2cd8f049e011d2c 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -39,6 +39,7 @@ using namespace llvm;
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
   RegisterTargetMachine<DirectXTargetMachine> X(getTheDirectXTarget());
   auto *PR = PassRegistry::getPassRegistry();
+  initializeDXILStrengthReducePass(*PR);
   initializeDXILPrepareModulePass(*PR);
   initializeEmbedDXILPassPass(*PR);
   initializeWriteDXILPassPass(*PR);
@@ -76,6 +77,7 @@ class DirectXPassConfig : public TargetPassConfig {
 
   FunctionPass *createTargetRegisterAllocator(bool) override { return nullptr; }
   void addCodeGenPrepare() override {
+    addPass(createDXILStrengthReducePass());
     addPass(createDXILOpLoweringLegacyPass());
     addPass(createDXILPrepareModulePass());
     addPass(createDXILTranslateMetadataPass());
diff --git a/llvm/test/CodeGen/DirectX/abs.ll b/llvm/test/CodeGen/DirectX/abs.ll
new file mode 100644
index 000000000000000..fa6b33ad749728e
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/abs.ll
@@ -0,0 +1,69 @@
+; Make sure dxil operation function calls for abs are appropriately strength reduced for int and int64_t.
+; RUN: opt -S -dxil-strength-reduce < %s | FileCheck %s -check-prefix=TEST_SR
+
+; Make sure output of strength reduction pass is lowered to DXIL code as expected.
+; RUN: opt -S -dxil-strength-reduce -dxil-op-lower < %s | FileCheck %s -check-prefix=TEST_SR_OP_LOWER
+
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-unknown-shadermodel6.5-compute"
+
+@"?a@@3HA" = local_unnamed_addr global i32 0, align 4
+@"?b@@3HA" = local_unnamed_addr global i32 0, align 4
+@"?c@@3JA" = local_unnamed_addr global i64 0, align 8
+@"?d@@3JA" = local_unnamed_addr global i64 0, align 8
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, argmem: none, inaccessiblemem: none)
+define void @"?test_i32@@YAXXZ"() local_unnamed_addr #1 {
+entry:
+  %0 = load i32, ptr @"?b@@3HA", align 4, !tbaa !4
+  ; TEST_SR:%NegArg = sub i32 0, %0
+  ; TEST_SR-NEXT: %IMax = tail call i32 @llvm.smax.i32(i32 %0, i32 %NegArg)
+  ; TEST_SR_OP_LOWER: %NegArg = sub i32 0, %0
+  ; TEST_SR_OP_LOWER-NEXT:%1 = tail call i32 @dx.op.binary.i32(i32 37, i32 %0, i32 %NegArg)
+  %elt.abs = tail call i32 @llvm.abs.i32(i32 %0, i1 false)
+  ; TEST_SR: store i32 %IMax, ptr @"?a@@3HA", align 4, !tbaa !4
+  ; TEST_SR_OP_LOWER: store i32 %1, ptr @"?a@@3HA", align 4, !tbaa !4
+  store i32 %elt.abs, ptr @"?a@@3HA", align 4, !tbaa !4
+  ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i32 @llvm.abs.i32(i32, i1 immarg) #2
+
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, argmem: none, inaccessiblemem: none)
+define void @"?test_i64@@YAXI at Z"(i32 noundef %GI) local_unnamed_addr #1 {
+entry:
+  %0 = load i64, ptr @"?d@@3JA", align 8, !tbaa !8
+  ; TEST_SR: %NegArg = sub i64 0, %0
+  ; TEST_SR-NEXT: %IMax = tail call i64 @llvm.smax.i64(i64 %0, i64 %NegArg)
+  ; TEST_SR_OP_LOWER: %NegArg = sub i64 0, %0
+  ; TEST_SR_OP_LOWER-NEXT: %1 = tail call i64 @dx.op.binary.i64(i32 37, i64 %0, i64 %NegArg)
+  %elt.abs = tail call i64 @llvm.abs.i64(i64 %0, i1 false)
+  ; TEST_SR: store i64 %IMax, ptr @"?c@@3JA", align 8, !tbaa !8
+  ; TEST_SR_OP_LOWER: store i64 %1, ptr @"?c@@3JA", align 8, !tbaa !8
+  store i64 %elt.abs, ptr @"?c@@3JA", align 8, !tbaa !8
+  ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare i64 @llvm.abs.i64(i64, i1 immarg) #2
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(readwrite, argmem: none, inaccessiblemem: none) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.module.flags = !{!0, !1}
+!dx.valver = !{!2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{i32 1, i32 7}
+!3 = !{!"clang version 18.0.0git (git at github.com:somefork/llvm-project.git someSHA)"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C++ TBAA"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"long", !6, i64 0}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/DirectX/ceil.ll b/llvm/test/CodeGen/DirectX/ceil.ll
new file mode 100644
index 000000000000000..3b1835386f28db2
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ceil.ll
@@ -0,0 +1,42 @@
+; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+
+; Make sure dxil operation function calls for ceil are generated for float and double.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
+target triple = "dxil-pc-shadermodel6.7-library"
+
+; Function Attrs: noinline nounwind optnone
+define noundef float @_Z3foof(float noundef %a) #0 {
+entry:
+  %a.addr = alloca float, align 4
+  store float %a, ptr %a.addr, align 4
+  %0 = load float, ptr %a.addr, align 4
+  ; CHECK:call float @dx.op.unary.f32(i32 28, floa...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/78767