[llvm] 47b3b76 - Implement inlining of strictfp functions
Serge Pavlov via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 31 05:16:39 PDT 2022
Author: Serge Pavlov
Date: 2022-03-31T19:15:52+07:00
New Revision: 47b3b76825dc89d4ee37408f26b458f61f86fbf5
URL: https://github.com/llvm/llvm-project/commit/47b3b76825dc89d4ee37408f26b458f61f86fbf5
DIFF: https://github.com/llvm/llvm-project/commit/47b3b76825dc89d4ee37408f26b458f61f86fbf5.diff
LOG: Implement inlining of strictfp functions
According to the current design, if a floating point operation is
represented by a constrained intrinsic somewhere in a function, all
floating point operations in the function must be represented by
constrained intrinsics. It imposes additional requirements to inlining
mechanism. If non-strictfp function is inlined into strictfp function,
all ordinary FP operations must be replaced with their constrained
counterparts.
Inlining strictfp function into non-strictfp is not implemented as it
would require replacement of all FP operations in the host function,
which now is undesirable due to expected performance loss.
Differential Revision: https://reviews.llvm.org/D69798
Added:
llvm/test/Transforms/Inline/inline-strictfp.ll
Modified:
llvm/lib/Transforms/Utils/CloneFunction.cpp
llvm/lib/Transforms/Utils/InlineFunction.cpp
llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index db252b3a0edf4..91b28d9d826c1 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -322,6 +322,9 @@ struct PruningFunctionCloner {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
+ bool HostFuncIsStrictFP;
+
+ Instruction *cloneInstruction(BasicBlock::const_iterator II);
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
@@ -329,7 +332,10 @@ struct PruningFunctionCloner {
const char *nameSuffix, ClonedCodeInfo *codeInfo)
: NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
- CodeInfo(codeInfo) {}
+ CodeInfo(codeInfo) {
+ HostFuncIsStrictFP =
+ newFunc->getAttributes().hasFnAttr(Attribute::StrictFP);
+ }
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
@@ -338,6 +344,89 @@ struct PruningFunctionCloner {
};
} // namespace
+static bool hasRoundingModeOperand(Intrinsic::ID CIID) {
+ switch (CIID) {
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
+ case Intrinsic::INTRINSIC: \
+ return ROUND_MODE == 1;
+#define FUNCTION INSTRUCTION
+#include "llvm/IR/ConstrainedOps.def"
+ default:
+ llvm_unreachable("Unexpected constrained intrinsic id");
+ }
+}
+
+Instruction *
+PruningFunctionCloner::cloneInstruction(BasicBlock::const_iterator II) {
+ const Instruction &OldInst = *II;
+ Instruction *NewInst = nullptr;
+ if (HostFuncIsStrictFP) {
+ Intrinsic::ID CIID = getConstrainedIntrinsicID(OldInst);
+ if (CIID != Intrinsic::not_intrinsic) {
+ // Instead of cloning the instruction, a call to constrained intrinsic
+ // should be created.
+ // Assume the first arguments of constrained intrinsics are the same as
+ // the operands of original instruction.
+
+ // Determine overloaded types of the intrinsic.
+ SmallVector<Type *, 2> TParams;
+ SmallVector<Intrinsic::IITDescriptor, 8> Descriptor;
+ getIntrinsicInfoTableEntries(CIID, Descriptor);
+ for (unsigned I = 0, E = Descriptor.size(); I != E; ++I) {
+ Intrinsic::IITDescriptor Operand = Descriptor[I];
+ switch (Operand.Kind) {
+ case Intrinsic::IITDescriptor::Argument:
+ if (Operand.getArgumentKind() !=
+ Intrinsic::IITDescriptor::AK_MatchType) {
+ if (I == 0)
+ TParams.push_back(OldInst.getType());
+ else
+ TParams.push_back(OldInst.getOperand(I - 1)->getType());
+ }
+ break;
+ case Intrinsic::IITDescriptor::SameVecWidthArgument:
+ ++I;
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Create intrinsic call.
+ LLVMContext &Ctx = NewFunc->getContext();
+ Function *IFn =
+ Intrinsic::getDeclaration(NewFunc->getParent(), CIID, TParams);
+ SmallVector<Value *, 4> Args;
+ unsigned NumOperands = OldInst.getNumOperands();
+ if (isa<CallInst>(OldInst))
+ --NumOperands;
+ for (unsigned I = 0; I < NumOperands; ++I) {
+ Value *Op = OldInst.getOperand(I);
+ Args.push_back(Op);
+ }
+ if (const auto *CmpI = dyn_cast<FCmpInst>(&OldInst)) {
+ FCmpInst::Predicate Pred = CmpI->getPredicate();
+ StringRef PredName = FCmpInst::getPredicateName(Pred);
+ Args.push_back(MetadataAsValue::get(Ctx, MDString::get(Ctx, PredName)));
+ }
+
+ // The last arguments of a constrained intrinsic are metadata that
+ // represent rounding mode (absents in some intrinsics) and exception
+ // behavior. The inlined function uses default settings.
+ if (hasRoundingModeOperand(CIID))
+ Args.push_back(
+ MetadataAsValue::get(Ctx, MDString::get(Ctx, "round.tonearest")));
+ Args.push_back(
+ MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore")));
+
+ NewInst = CallInst::Create(IFn, Args, OldInst.getName() + ".strict");
+ }
+ }
+ if (!NewInst)
+ NewInst = II->clone();
+ return NewInst;
+}
+
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(
@@ -377,7 +466,14 @@ void PruningFunctionCloner::CloneBlock(
for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
++II) {
- Instruction *NewInst = II->clone();
+ Instruction *NewInst = cloneInstruction(II);
+
+ if (HostFuncIsStrictFP) {
+ // All function calls in the inlined function must get 'strictfp'
+ // attribute to prevent undesirable optimizations.
+ if (auto *Call = dyn_cast<CallInst>(NewInst))
+ Call->addFnAttr(Attribute::StrictFP);
+ }
// Eagerly remap operands to the newly cloned instruction, except for PHI
// nodes for which we defer processing until we update the CFG.
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index e0ae69ada9376..a8616968c15ad 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1788,6 +1788,13 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
BasicBlock *OrigBB = CB.getParent();
Function *Caller = OrigBB->getParent();
+ // Do not inline strictfp function into non-strictfp one. It would require
+ // conversion of all FP operations in host function to constrained intrinsics.
+ if (CalledFunc->getAttributes().hasFnAttr(Attribute::StrictFP) &&
+ !Caller->getAttributes().hasFnAttr(Attribute::StrictFP)) {
+ return InlineResult::failure("incompatible strictfp attributes");
+ }
+
// GC poses two hazards to inlining, which only occur when the callee has GC:
// 1. If the caller has no GC, then the callee's GC must be propagated to the
// caller.
diff --git a/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll b/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll
index 8725ee69c940f..3ce0520fc456d 100644
--- a/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll
+++ b/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll
@@ -73,11 +73,11 @@ entry:
attributes #0 = {
inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind
nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory
- sanitize_thread ssp sspreq sspstrong strictfp uwtable "foo"="bar"
+ sanitize_thread ssp sspreq sspstrong uwtable "foo"="bar"
"patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" }
; CHECK: attributes [[FN_ATTRS0]] = { ssp
-; CHECK: attributes [[FN_ATTRS]] = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread ssp sspreq sspstrong strictfp uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" }
+; CHECK: attributes [[FN_ATTRS]] = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread ssp sspreq sspstrong uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" }
; attributes to drop
attributes #1 = {
diff --git a/llvm/test/Transforms/Inline/inline-strictfp.ll b/llvm/test/Transforms/Inline/inline-strictfp.ll
new file mode 100644
index 0000000000000..3028409fc167e
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-strictfp.ll
@@ -0,0 +1,145 @@
+; RUN: opt -inline %s -S | FileCheck %s
+
+
+; Ordinary function is inlined into strictfp function.
+
+define float @inlined_01(float %a) {
+entry:
+ %add = fadd float %a, %a
+ ret float %add
+}
+
+define float @host_02(float %a) #0 {
+entry:
+ %0 = call float @inlined_01(float %a) #0
+ %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ ret float %add
+; CHECK_LABEL: @host_02
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+}
+
+
+; strictfp function is inlined into another strictfp function.
+
+define float @inlined_03(float %a) #0 {
+entry:
+ %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.maytrap") #0
+ ret float %add
+}
+
+define float @host_04(float %a) #0 {
+entry:
+ %0 = call float @inlined_03(float %a) #0
+ %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ ret float %add
+; CHECK_LABEL: @host_04
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.downward", metadata !"fpexcept.maytrap") #0
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+}
+
+
+; strictfp function is NOT inlined into ordinary function.
+
+define float @inlined_05(float %a) strictfp {
+entry:
+ %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.maytrap") #0
+ ret float %add
+}
+
+define float @host_06(float %a) {
+entry:
+ %0 = call float @inlined_05(float %a)
+ %add = fadd float %0, 2.000000e+00
+ ret float %add
+; CHECK_LABEL: @host_06
+; CHECK: call float @inlined_05(float %a)
+; CHECK: fadd float %0, 2.000000e+00
+}
+
+
+; Calls in inlined function must get strictfp attribute.
+
+declare float @func_ext(float);
+
+define float @inlined_07(float %a) {
+entry:
+ %0 = call float @func_ext(float %a)
+ %add = fadd float %0, %a
+
+ ret float %add
+}
+
+define float @host_08(float %a) #0 {
+entry:
+ %0 = call float @inlined_07(float %a) #0
+ %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ ret float %add
+; CHECK_LABEL: @host_08
+; CHECK: call float @func_ext(float {{.*}}) #0
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+}
+
+
+; Cloning particular instructions.
+
+; fpext has two overloaded types.
+define double @inlined_09(float %a) {
+entry:
+ %t = fpext float %a to double
+ ret double %t
+}
+
+define double @host_10(float %a) #0 {
+entry:
+ %0 = call double @inlined_09(float %a) #0
+ %add = call double @llvm.experimental.constrained.fadd.f64(double %0, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ ret double %add
+; CHECK_LABEL: @host_10
+; CHECK: call double @llvm.experimental.constrained.fpext.f64.f32(float {{.*}}, metadata !"fpexcept.ignore") #0
+; CHECK: call double @llvm.experimental.constrained.fadd.f64(double {{.*}}, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+}
+
+; fcmp does not depend on rounding mode and has metadata argument.
+define i1 @inlined_11(float %a, float %b) {
+entry:
+ %t = fcmp oeq float %a, %b
+ ret i1 %t
+}
+
+define i1 @host_12(float %a, float %b) #0 {
+entry:
+ %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ %cmp = call i1 @inlined_11(float %a, float %b) #0
+ ret i1 %cmp
+; CHECK_LABEL: @host_12
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+; CHECK: call i1 @llvm.experimental.constrained.fcmp.f32(float {{.*}}, metadata !"oeq", metadata !"fpexcept.ignore") #0
+}
+
+; Intrinsic 'ceil' has constrained variant.
+define float @inlined_13(float %a) {
+entry:
+ %t = call float @llvm.ceil.f32(float %a)
+ ret float %t
+}
+
+define float @host_14(float %a) #0 {
+entry:
+ %0 = call float @inlined_13(float %a) #0
+ %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+ ret float %add
+; CHECK_LABEL: @host_14
+; CHECK: call float @llvm.experimental.constrained.ceil.f32(float %a, metadata !"fpexcept.ignore") #0
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+}
+
+attributes #0 = { strictfp }
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
+declare float @llvm.ceil.f32(float)
More information about the llvm-commits
mailing list