[llvm] [Scalarizer][DirectX] Add support for scalarization of Target intrinsics (PR #108776)
Farzon Lotfi via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 16 13:22:13 PDT 2024
https://github.com/farzonl updated https://github.com/llvm/llvm-project/pull/108776
>From 64f8cf1905a1e3d3fb76fa9b4d494110e0bc4b3e Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzon at farzon.org>
Date: Fri, 13 Sep 2024 14:21:18 -0400
Subject: [PATCH 1/2] [DirectX] Add support for scalarization of Target
intrinsics
---
.../llvm/Analysis/TargetTransformInfo.h | 7 ++-
.../llvm/Analysis/TargetTransformInfoImpl.h | 4 ++
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 4 ++
llvm/lib/Analysis/TargetTransformInfo.cpp | 5 ++
llvm/lib/Target/DirectX/CMakeLists.txt | 1 +
.../DirectX/DirectXTargetTransformInfo.cpp | 25 ++++++++++
.../DirectX/DirectXTargetTransformInfo.h | 1 +
llvm/lib/Transforms/Scalar/Scalarizer.cpp | 31 ++++++++----
llvm/test/CodeGen/DirectX/frac.ll | 48 +++++++++++--------
llvm/test/CodeGen/DirectX/rsqrt.ll | 36 +++++++++-----
10 files changed, 121 insertions(+), 41 deletions(-)
create mode 100644 llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index b2124c6106198e..3411163549de2f 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -882,6 +882,8 @@ class TargetTransformInfo {
/// should use coldcc calling convention.
bool useColdCCForColdCall(Function &F) const;
+ bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const;
+
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the demanded result elements need to be inserted and/or
/// extracted from vectors.
@@ -1928,6 +1930,7 @@ class TargetTransformInfo::Concept {
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
virtual bool shouldBuildRelLookupTables() = 0;
virtual bool useColdCCForColdCall(Function &F) = 0;
+ virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) = 0;
virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert, bool Extract,
@@ -2467,7 +2470,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
bool useColdCCForColdCall(Function &F) override {
return Impl.useColdCCForColdCall(F);
}
-
+ bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) override {
+ return Impl.isTargetIntrinsicTriviallyScalarizable(ID);
+ }
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert, bool Extract,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 90eef93a2a54d5..2819af30cd1704 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -373,6 +373,10 @@ class TargetTransformInfoImplBase {
bool useColdCCForColdCall(Function &F) const { return false; }
+ bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const {
+ return false;
+ }
+
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert, bool Extract,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index caa3a57ebabc2e..2f2a6a09ffc44d 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -789,6 +789,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return Cost;
}
+ bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const {
+ return false;
+ }
+
/// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert,
bool Extract,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 2c26493bd3f1ca..67b626f300a101 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -587,6 +587,11 @@ bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
return TTIImpl->useColdCCForColdCall(F);
}
+bool TargetTransformInfo::isTargetIntrinsicTriviallyScalarizable(
+ Intrinsic::ID ID) const {
+ return TTIImpl->isTargetIntrinsicTriviallyScalarizable(ID);
+}
+
InstructionCost TargetTransformInfo::getScalarizationOverhead(
VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
TTI::TargetCostKind CostKind) const {
diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt b/llvm/lib/Target/DirectX/CMakeLists.txt
index f7ae09957996b5..a9c5d81391b8d7 100644
--- a/llvm/lib/Target/DirectX/CMakeLists.txt
+++ b/llvm/lib/Target/DirectX/CMakeLists.txt
@@ -18,6 +18,7 @@ add_llvm_target(DirectXCodeGen
DirectXRegisterInfo.cpp
DirectXSubtarget.cpp
DirectXTargetMachine.cpp
+ DirectXTargetTransformInfo.cpp
DXContainerGlobals.cpp
DXILFinalizeLinkage.cpp
DXILIntrinsicExpansion.cpp
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
new file mode 100644
index 00000000000000..1a59f04b214042
--- /dev/null
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp
@@ -0,0 +1,25 @@
+//===- DirectXTargetTransformInfo.cpp - DirectX TTI ---------------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+//===----------------------------------------------------------------------===//
+
+#include "DirectXTargetTransformInfo.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
+
+bool llvm::DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable(
+ Intrinsic::ID ID) const {
+ switch (ID) {
+ case Intrinsic::dx_frac:
+ case Intrinsic::dx_rsqrt:
+ return true;
+ default:
+ return false;
+ }
+}
diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h
index ed98355fad002d..48414549f83495 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h
+++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h
@@ -34,6 +34,7 @@ class DirectXTTIImpl : public BasicTTIImplBase<DirectXTTIImpl> {
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
TLI(ST->getTargetLowering()) {}
unsigned getMinVectorRegisterBitWidth() const { return 32; }
+ bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const;
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 01d24335df2262..e4ec09aa6219ea 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -32,6 +33,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
@@ -281,10 +283,11 @@ T getWithDefaultOverride(const cl::opt<T> &ClOption,
class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
public:
- ScalarizerVisitor(DominatorTree *DT, ScalarizerPassOptions Options)
- : DT(DT), ScalarizeVariableInsertExtract(getWithDefaultOverride(
- ClScalarizeVariableInsertExtract,
- Options.ScalarizeVariableInsertExtract)),
+ ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
+ ScalarizerPassOptions Options)
+ : DT(DT), TTI(TTI), ScalarizeVariableInsertExtract(getWithDefaultOverride(
+ ClScalarizeVariableInsertExtract,
+ Options.ScalarizeVariableInsertExtract)),
ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore,
Options.ScalarizeLoadStore)),
ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits,
@@ -292,6 +295,8 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
bool visit(Function &F);
+ bool isTriviallyScalarizable(Intrinsic::ID ID);
+
// InstVisitor methods. They return true if the instruction was scalarized,
// false if nothing changed.
bool visitInstruction(Instruction &I) { return false; }
@@ -335,6 +340,7 @@ class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
DominatorTree *DT;
+ const TargetTransformInfo *TTI;
const bool ScalarizeVariableInsertExtract;
const bool ScalarizeLoadStore;
@@ -358,6 +364,7 @@ ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
@@ -445,7 +452,9 @@ bool ScalarizerLegacyPass::runOnFunction(Function &F) {
return false;
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- ScalarizerVisitor Impl(DT, Options);
+ const TargetTransformInfo *TTI =
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ ScalarizerVisitor Impl(DT, TTI, Options);
return Impl.visit(F);
}
@@ -689,8 +698,10 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
return true;
}
-static bool isTriviallyScalariable(Intrinsic::ID ID) {
- return isTriviallyVectorizable(ID);
+bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
+
+ return TTI->isTargetIntrinsicTriviallyScalarizable(ID) ||
+ isTriviallyVectorizable(ID);
}
/// If a call to a vector typed intrinsic function, split into a scalar call per
@@ -705,7 +716,8 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
return false;
Intrinsic::ID ID = F->getIntrinsicID();
- if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID))
+
+ if (ID == Intrinsic::not_intrinsic || !isTriviallyScalarizable(ID))
return false;
// unsigned NumElems = VT->getNumElements();
@@ -1249,7 +1261,8 @@ bool ScalarizerVisitor::finish() {
PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) {
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- ScalarizerVisitor Impl(DT, Options);
+ const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
+ ScalarizerVisitor Impl(DT, TTI, Options);
bool Changed = Impl.visit(F);
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
diff --git a/llvm/test/CodeGen/DirectX/frac.ll b/llvm/test/CodeGen/DirectX/frac.ll
index ae86fe06654da1..7a2c37b9a477f7 100644
--- a/llvm/test/CodeGen/DirectX/frac.ll
+++ b/llvm/test/CodeGen/DirectX/frac.ll
@@ -1,31 +1,39 @@
-; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for frac are generated for float and half.
-; CHECK:call float @dx.op.unary.f32(i32 22, float %{{.*}})
-; CHECK:call half @dx.op.unary.f16(i32 22, half %{{.*}})
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-pc-shadermodel6.7-library"
+define noundef half @frac_half(half noundef %a) {
+entry:
+ ; CHECK:call half @dx.op.unary.f16(i32 22, half %{{.*}})
+ %dx.frac = call half @llvm.dx.frac.f16(half %a)
+ ret half %dx.frac
+}
-; Function Attrs: noinline nounwind optnone
define noundef float @frac_float(float noundef %a) #0 {
entry:
- %a.addr = alloca float, align 4
- store float %a, ptr %a.addr, align 4
- %0 = load float, ptr %a.addr, align 4
- %dx.frac = call float @llvm.dx.frac.f32(float %0)
+ ; CHECK:call float @dx.op.unary.f32(i32 22, float %{{.*}})
+ %dx.frac = call float @llvm.dx.frac.f32(float %a)
ret float %dx.frac
}
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.dx.frac.f32(float) #1
-
-; Function Attrs: noinline nounwind optnone
-define noundef half @frac_half(half noundef %a) #0 {
+define noundef <4 x float> @frac_float4(<4 x float> noundef %a) #0 {
entry:
- %a.addr = alloca half, align 2
- store half %a, ptr %a.addr, align 2
- %0 = load half, ptr %a.addr, align 2
- %dx.frac = call half @llvm.dx.frac.f16(half %0)
- ret half %dx.frac
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.dx.frac.v4f32(<4 x float> %a)
+ ret <4 x float> %2
}
+
+declare half @llvm.dx.frac.f16(half)
+declare float @llvm.dx.frac.f32(float)
+declare <4 x float> @llvm.dx.frac.v4f32(<4 x float>)
\ No newline at end of file
diff --git a/llvm/test/CodeGen/DirectX/rsqrt.ll b/llvm/test/CodeGen/DirectX/rsqrt.ll
index 054c84483ef826..d7d85d377b127b 100644
--- a/llvm/test/CodeGen/DirectX/rsqrt.ll
+++ b/llvm/test/CodeGen/DirectX/rsqrt.ll
@@ -1,28 +1,42 @@
-; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for rsqrt are generated for float and half.
; CHECK-LABEL: rsqrt_float
-; CHECK: call float @dx.op.unary.f32(i32 25, float %{{.*}})
define noundef float @rsqrt_float(float noundef %a) {
entry:
- %a.addr = alloca float, align 4
- store float %a, ptr %a.addr, align 4
- %0 = load float, ptr %a.addr, align 4
- %dx.rsqrt = call float @llvm.dx.rsqrt.f32(float %0)
+; CHECK: call float @dx.op.unary.f32(i32 25, float %{{.*}})
+ %dx.rsqrt = call float @llvm.dx.rsqrt.f32(float %a)
ret float %dx.rsqrt
}
; CHECK-LABEL: rsqrt_half
-; CHECK: call half @dx.op.unary.f16(i32 25, half %{{.*}})
define noundef half @rsqrt_half(half noundef %a) {
entry:
- %a.addr = alloca half, align 2
- store half %a, ptr %a.addr, align 2
- %0 = load half, ptr %a.addr, align 2
- %dx.rsqrt = call half @llvm.dx.rsqrt.f16(half %0)
+; CHECK: call half @dx.op.unary.f16(i32 25, half %{{.*}})
+ %dx.rsqrt = call half @llvm.dx.rsqrt.f16(half %a)
ret half %dx.rsqrt
}
+define noundef <4 x float> @rsqrt_float4(<4 x float> noundef %a) #0 {
+entry:
+ ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
+ ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee0]])
+ ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
+ ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee1]])
+ ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
+ ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee2]])
+ ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
+ ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee3]])
+ ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
+ ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
+ %2 = call <4 x float> @llvm.dx.rsqrt.v4f32(<4 x float> %a)
+ ret <4 x float> %2
+}
+
+
declare half @llvm.dx.rsqrt.f16(half)
declare float @llvm.dx.rsqrt.f32(float)
+declare <4 x float> @llvm.dx.rsqrt.v4f32(<4 x float>)
>From f74ad820f697411f646007ca9b6c0c78c76c2b6c Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlotfi at microsoft.com>
Date: Mon, 16 Sep 2024 16:21:53 -0400
Subject: [PATCH 2/2] address pr comments
---
llvm/lib/Transforms/Scalar/Scalarizer.cpp | 6 +--
llvm/test/CodeGen/DirectX/frac.ll | 48 +++++++++++++++--------
llvm/test/CodeGen/DirectX/llc-pipeline.ll | 1 +
llvm/test/CodeGen/DirectX/rsqrt.ll | 44 ++++++++++++++-------
4 files changed, 65 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index e4ec09aa6219ea..126cf8b66f7102 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -699,9 +699,9 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
}
bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) {
-
- return TTI->isTargetIntrinsicTriviallyScalarizable(ID) ||
- isTriviallyVectorizable(ID);
+ if (isTriviallyVectorizable(ID))
+ return true;
+ return Function::isTargetIntrinsic(ID) && TTI->isTargetIntrinsicTriviallyScalarizable(ID);
}
/// If a call to a vector typed intrinsic function, split into a scalar call per
diff --git a/llvm/test/CodeGen/DirectX/frac.ll b/llvm/test/CodeGen/DirectX/frac.ll
index 7a2c37b9a477f7..ef24527ce837b0 100644
--- a/llvm/test/CodeGen/DirectX/frac.ll
+++ b/llvm/test/CodeGen/DirectX/frac.ll
@@ -1,39 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for frac are generated for float and half.
define noundef half @frac_half(half noundef %a) {
+; CHECK-LABEL: define noundef half @frac_half(
+; CHECK-SAME: half noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[DX_FRAC1:%.*]] = call half @dx.op.unary.f16(i32 22, half [[A]])
+; CHECK-NEXT: ret half [[DX_FRAC1]]
+;
entry:
- ; CHECK:call half @dx.op.unary.f16(i32 22, half %{{.*}})
%dx.frac = call half @llvm.dx.frac.f16(half %a)
ret half %dx.frac
}
define noundef float @frac_float(float noundef %a) #0 {
+; CHECK-LABEL: define noundef float @frac_float(
+; CHECK-SAME: float noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[DX_FRAC1:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A]])
+; CHECK-NEXT: ret float [[DX_FRAC1]]
+;
entry:
- ; CHECK:call float @dx.op.unary.f32(i32 22, float %{{.*}})
%dx.frac = call float @llvm.dx.frac.f32(float %a)
ret float %dx.frac
}
define noundef <4 x float> @frac_float4(<4 x float> noundef %a) #0 {
+; CHECK-LABEL: define noundef <4 x float> @frac_float4(
+; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x float> [[A]], i64 0
+; CHECK-NEXT: [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I0]])
+; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x float> [[A]], i64 1
+; CHECK-NEXT: [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I1]])
+; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x float> [[A]], i64 2
+; CHECK-NEXT: [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I2]])
+; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x float> [[A]], i64 3
+; CHECK-NEXT: [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 22, float [[A_I3]])
+; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x float> poison, float [[DOTI04]], i64 0
+; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x float> [[DOTUPTO0]], float [[DOTI13]], i64 1
+; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x float> [[DOTUPTO1]], float [[DOTI22]], i64 2
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> [[DOTUPTO2]], float [[DOTI31]], i64 3
+; CHECK-NEXT: ret <4 x float> [[TMP0]]
+;
entry:
- ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
- ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee0]])
- ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
- ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee1]])
- ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
- ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee2]])
- ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
- ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 22, float [[ee3]])
- ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
- ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
- ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
- ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
- %2 = call <4 x float> @llvm.dx.frac.v4f32(<4 x float> %a)
+ %2 = call <4 x float> @llvm.dx.frac.v4f32(<4 x float> %a)
ret <4 x float> %2
}
declare half @llvm.dx.frac.f16(half)
declare float @llvm.dx.frac.f32(float)
-declare <4 x float> @llvm.dx.frac.v4f32(<4 x float>)
\ No newline at end of file
+declare <4 x float> @llvm.dx.frac.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index 52bd891aee7d4e..46326d69175876 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -5,6 +5,7 @@
; CHECK-LABEL: Pass Arguments:
; CHECK-NEXT: Target Library Information
+; CHECK-NEXT: Target Transform Information
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: DXIL Intrinsic Expansion
; CHECK-NEXT: FunctionPass Manager
diff --git a/llvm/test/CodeGen/DirectX/rsqrt.ll b/llvm/test/CodeGen/DirectX/rsqrt.ll
index d7d85d377b127b..26b22e19635af2 100644
--- a/llvm/test/CodeGen/DirectX/rsqrt.ll
+++ b/llvm/test/CodeGen/DirectX/rsqrt.ll
@@ -1,38 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure dxil operation function calls for rsqrt are generated for float and half.
; CHECK-LABEL: rsqrt_float
define noundef float @rsqrt_float(float noundef %a) {
+; CHECK-SAME: float noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[DX_RSQRT1:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A]])
+; CHECK-NEXT: ret float [[DX_RSQRT1]]
+;
entry:
-; CHECK: call float @dx.op.unary.f32(i32 25, float %{{.*}})
%dx.rsqrt = call float @llvm.dx.rsqrt.f32(float %a)
ret float %dx.rsqrt
}
; CHECK-LABEL: rsqrt_half
define noundef half @rsqrt_half(half noundef %a) {
+; CHECK-SAME: half noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[DX_RSQRT1:%.*]] = call half @dx.op.unary.f16(i32 25, half [[A]])
+; CHECK-NEXT: ret half [[DX_RSQRT1]]
+;
entry:
-; CHECK: call half @dx.op.unary.f16(i32 25, half %{{.*}})
%dx.rsqrt = call half @llvm.dx.rsqrt.f16(half %a)
ret half %dx.rsqrt
}
define noundef <4 x float> @rsqrt_float4(<4 x float> noundef %a) #0 {
+; CHECK-LABEL: define noundef <4 x float> @rsqrt_float4(
+; CHECK-SAME: <4 x float> noundef [[A:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[A_I0:%.*]] = extractelement <4 x float> [[A]], i64 0
+; CHECK-NEXT: [[DOTI04:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I0]])
+; CHECK-NEXT: [[A_I1:%.*]] = extractelement <4 x float> [[A]], i64 1
+; CHECK-NEXT: [[DOTI13:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I1]])
+; CHECK-NEXT: [[A_I2:%.*]] = extractelement <4 x float> [[A]], i64 2
+; CHECK-NEXT: [[DOTI22:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I2]])
+; CHECK-NEXT: [[A_I3:%.*]] = extractelement <4 x float> [[A]], i64 3
+; CHECK-NEXT: [[DOTI31:%.*]] = call float @dx.op.unary.f32(i32 25, float [[A_I3]])
+; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x float> poison, float [[DOTI04]], i64 0
+; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x float> [[DOTUPTO0]], float [[DOTI13]], i64 1
+; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x float> [[DOTUPTO1]], float [[DOTI22]], i64 2
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> [[DOTUPTO2]], float [[DOTI31]], i64 3
+; CHECK-NEXT: ret <4 x float> [[TMP0]]
+;
entry:
- ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0
- ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee0]])
- ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1
- ; CHECK: [[ie1:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee1]])
- ; CHECK: [[ee2:%.*]] = extractelement <4 x float> %a, i64 2
- ; CHECK: [[ie2:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee2]])
- ; CHECK: [[ee3:%.*]] = extractelement <4 x float> %a, i64 3
- ; CHECK: [[ie3:%.*]] = call float @dx.op.unary.f32(i32 25, float [[ee3]])
- ; CHECK: insertelement <4 x float> poison, float [[ie0]], i64 0
- ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie1]], i64 1
- ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie2]], i64 2
- ; CHECK: insertelement <4 x float> %{{.*}}, float [[ie3]], i64 3
- %2 = call <4 x float> @llvm.dx.rsqrt.v4f32(<4 x float> %a)
+ %2 = call <4 x float> @llvm.dx.rsqrt.v4f32(<4 x float> %a)
ret <4 x float> %2
}
More information about the llvm-commits
mailing list