[llvm] 47b3b76 - Implement inlining of strictfp functions

Thu Mar 31 05:16:39 PDT 2022

Author: Serge Pavlov
Date: 2022-03-31T19:15:52+07:00
New Revision: 47b3b76825dc89d4ee37408f26b458f61f86fbf5

URL: https://github.com/llvm/llvm-project/commit/47b3b76825dc89d4ee37408f26b458f61f86fbf5
DIFF: https://github.com/llvm/llvm-project/commit/47b3b76825dc89d4ee37408f26b458f61f86fbf5.diff

LOG: Implement inlining of strictfp functions

According to the current design, if a floating point operation is
represented by a constrained intrinsic somewhere in a function, all
floating point operations in the function must be represented by
constrained intrinsics. It imposes additional requirements to inlining
mechanism. If non-strictfp function is inlined into strictfp function,
all ordinary FP operations must be replaced with their constrained
counterparts.

Inlining strictfp function into non-strictfp is not implemented as it
would require replacement of all FP operations in the host function,
which now is undesirable due to expected performance loss.

Differential Revision: https://reviews.llvm.org/D69798

Added: 
    llvm/test/Transforms/Inline/inline-strictfp.ll

Modified: 
    llvm/lib/Transforms/Utils/CloneFunction.cpp
    llvm/lib/Transforms/Utils/InlineFunction.cpp
    llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index db252b3a0edf4..91b28d9d826c1 100644

--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -322,6 +322,9 @@ struct PruningFunctionCloner {
   bool ModuleLevelChanges;
   const char *NameSuffix;
   ClonedCodeInfo *CodeInfo;
+  bool HostFuncIsStrictFP;
+
+  Instruction *cloneInstruction(BasicBlock::const_iterator II);
 
 public:
   PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
@@ -329,7 +332,10 @@ struct PruningFunctionCloner {
                         const char *nameSuffix, ClonedCodeInfo *codeInfo)
       : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
         ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
-        CodeInfo(codeInfo) {}
+        CodeInfo(codeInfo) {
+    HostFuncIsStrictFP =
+        newFunc->getAttributes().hasFnAttr(Attribute::StrictFP);
+  }
 
   /// The specified block is found to be reachable, clone it and
   /// anything that it can reach.
@@ -338,6 +344,89 @@ struct PruningFunctionCloner {
 };
 } // namespace
 
+static bool hasRoundingModeOperand(Intrinsic::ID CIID) {
+  switch (CIID) {
+#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC)                         \
+  case Intrinsic::INTRINSIC:                                                   \
+    return ROUND_MODE == 1;
+#define FUNCTION INSTRUCTION
+#include "llvm/IR/ConstrainedOps.def"
+  default:
+    llvm_unreachable("Unexpected constrained intrinsic id");
+  }
+}
+
+Instruction *
+PruningFunctionCloner::cloneInstruction(BasicBlock::const_iterator II) {
+  const Instruction &OldInst = *II;
+  Instruction *NewInst = nullptr;
+  if (HostFuncIsStrictFP) {
+    Intrinsic::ID CIID = getConstrainedIntrinsicID(OldInst);
+    if (CIID != Intrinsic::not_intrinsic) {
+      // Instead of cloning the instruction, a call to constrained intrinsic
+      // should be created.
+      // Assume the first arguments of constrained intrinsics are the same as
+      // the operands of original instruction.
+
+      // Determine overloaded types of the intrinsic.
+      SmallVector<Type *, 2> TParams;
+      SmallVector<Intrinsic::IITDescriptor, 8> Descriptor;
+      getIntrinsicInfoTableEntries(CIID, Descriptor);
+      for (unsigned I = 0, E = Descriptor.size(); I != E; ++I) {
+        Intrinsic::IITDescriptor Operand = Descriptor[I];
+        switch (Operand.Kind) {
+        case Intrinsic::IITDescriptor::Argument:
+          if (Operand.getArgumentKind() !=
+              Intrinsic::IITDescriptor::AK_MatchType) {
+            if (I == 0)
+              TParams.push_back(OldInst.getType());
+            else
+              TParams.push_back(OldInst.getOperand(I - 1)->getType());
+          }
+          break;
+        case Intrinsic::IITDescriptor::SameVecWidthArgument:
+          ++I;
+          break;
+        default:
+          break;
+        }
+      }
+
+      // Create intrinsic call.
+      LLVMContext &Ctx = NewFunc->getContext();
+      Function *IFn =
+          Intrinsic::getDeclaration(NewFunc->getParent(), CIID, TParams);
+      SmallVector<Value *, 4> Args;
+      unsigned NumOperands = OldInst.getNumOperands();
+      if (isa<CallInst>(OldInst))
+        --NumOperands;
+      for (unsigned I = 0; I < NumOperands; ++I) {
+        Value *Op = OldInst.getOperand(I);
+        Args.push_back(Op);
+      }
+      if (const auto *CmpI = dyn_cast<FCmpInst>(&OldInst)) {
+        FCmpInst::Predicate Pred = CmpI->getPredicate();
+        StringRef PredName = FCmpInst::getPredicateName(Pred);
+        Args.push_back(MetadataAsValue::get(Ctx, MDString::get(Ctx, PredName)));
+      }
+
+      // The last arguments of a constrained intrinsic are metadata that
+      // represent rounding mode (absents in some intrinsics) and exception
+      // behavior. The inlined function uses default settings.
+      if (hasRoundingModeOperand(CIID))
+        Args.push_back(
+            MetadataAsValue::get(Ctx, MDString::get(Ctx, "round.tonearest")));
+      Args.push_back(
+          MetadataAsValue::get(Ctx, MDString::get(Ctx, "fpexcept.ignore")));
+
+      NewInst = CallInst::Create(IFn, Args, OldInst.getName() + ".strict");
+    }
+  }
+  if (!NewInst)
+    NewInst = II->clone();
+  return NewInst;
+}
+
 /// The specified block is found to be reachable, clone it and
 /// anything that it can reach.
 void PruningFunctionCloner::CloneBlock(
@@ -377,7 +466,14 @@ void PruningFunctionCloner::CloneBlock(
   for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end(); II != IE;
        ++II) {
 
-    Instruction *NewInst = II->clone();
+    Instruction *NewInst = cloneInstruction(II);
+
+    if (HostFuncIsStrictFP) {
+      // All function calls in the inlined function must get 'strictfp'
+      // attribute to prevent undesirable optimizations.
+      if (auto *Call = dyn_cast<CallInst>(NewInst))
+        Call->addFnAttr(Attribute::StrictFP);
+    }
 
     // Eagerly remap operands to the newly cloned instruction, except for PHI
     // nodes for which we defer processing until we update the CFG.

diff  --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index e0ae69ada9376..a8616968c15ad 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1788,6 +1788,13 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
   BasicBlock *OrigBB = CB.getParent();
   Function *Caller = OrigBB->getParent();
 
+  // Do not inline strictfp function into non-strictfp one. It would require
+  // conversion of all FP operations in host function to constrained intrinsics.
+  if (CalledFunc->getAttributes().hasFnAttr(Attribute::StrictFP) &&
+      !Caller->getAttributes().hasFnAttr(Attribute::StrictFP)) {
+    return InlineResult::failure("incompatible strictfp attributes");
+  }
+
   // GC poses two hazards to inlining, which only occur when the callee has GC:
   //  1. If the caller has no GC, then the callee's GC must be propagated to the
   //     caller.

diff  --git a/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll b/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll
index 8725ee69c940f..3ce0520fc456d 100644
--- a/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll
+++ b/llvm/test/Transforms/CodeExtractor/PartialInlineAttributes.ll
@@ -73,11 +73,11 @@ entry:
 attributes #0 = {
   inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind
   nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory
-  sanitize_thread ssp sspreq sspstrong strictfp uwtable "foo"="bar"
+  sanitize_thread ssp sspreq sspstrong uwtable "foo"="bar"
   "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" }
 
 ; CHECK: attributes [[FN_ATTRS0]] = { ssp
-; CHECK: attributes [[FN_ATTRS]] = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread ssp sspreq sspstrong strictfp uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" }
+; CHECK: attributes [[FN_ATTRS]] = { inlinehint minsize noduplicate noimplicitfloat norecurse noredzone nounwind nonlazybind optsize safestack sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread ssp sspreq sspstrong uwtable "foo"="bar" "patchable-function"="prologue-short-redirect" "probe-stack"="_foo_guard" "stack-probe-size"="4096" }
 
 ; attributes to drop
 attributes #1 = {

diff  --git a/llvm/test/Transforms/Inline/inline-strictfp.ll b/llvm/test/Transforms/Inline/inline-strictfp.ll
new file mode 100644
index 0000000000000..3028409fc167e
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-strictfp.ll
@@ -0,0 +1,145 @@
+; RUN: opt -inline %s -S | FileCheck %s
+
+
+; Ordinary function is inlined into strictfp function.
+
+define float @inlined_01(float %a) {
+entry:
+  %add = fadd float %a, %a
+  ret float %add
+}
+
+define float @host_02(float %a) #0 {
+entry:
+  %0 = call float @inlined_01(float %a) #0
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %add
+; CHECK_LABEL: @host_02
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+}
+
+
+; strictfp function is inlined into another strictfp function.
+
+define float @inlined_03(float %a) #0 {
+entry:
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.maytrap") #0
+  ret float %add
+}
+
+define float @host_04(float %a) #0 {
+entry:
+  %0 = call float @inlined_03(float %a) #0
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %add
+; CHECK_LABEL: @host_04
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.downward", metadata !"fpexcept.maytrap") #0
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+}
+
+
+; strictfp function is NOT inlined into ordinary function.
+
+define float @inlined_05(float %a) strictfp {
+entry:
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.downward", metadata !"fpexcept.maytrap") #0
+  ret float %add
+}
+
+define float @host_06(float %a) {
+entry:
+  %0 = call float @inlined_05(float %a)
+  %add = fadd float %0, 2.000000e+00
+  ret float %add
+; CHECK_LABEL: @host_06
+; CHECK: call float @inlined_05(float %a)
+; CHECK: fadd float %0, 2.000000e+00
+}
+
+
+; Calls in inlined function must get strictfp attribute.
+
+declare float @func_ext(float);
+
+define float @inlined_07(float %a) {
+entry:
+  %0 = call float @func_ext(float %a)
+  %add = fadd float %0, %a
+
+  ret float %add
+}
+
+define float @host_08(float %a) #0 {
+entry:
+  %0 = call float @inlined_07(float %a) #0
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %add
+; CHECK_LABEL: @host_08
+; CHECK: call float @func_ext(float {{.*}}) #0
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float {{.*}}, metadata !"round.tonearest", metadata !"fpexcept.ignore") #0
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+}
+
+
+; Cloning particular instructions.
+
+; fpext has two overloaded types.
+define double @inlined_09(float %a) {
+entry:
+  %t = fpext float %a to double
+  ret double %t
+}
+
+define double @host_10(float %a) #0 {
+entry:
+  %0 = call double @inlined_09(float %a) #0
+  %add = call double @llvm.experimental.constrained.fadd.f64(double %0, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret double %add
+; CHECK_LABEL: @host_10
+; CHECK: call double @llvm.experimental.constrained.fpext.f64.f32(float {{.*}}, metadata !"fpexcept.ignore") #0
+; CHECK: call double @llvm.experimental.constrained.fadd.f64(double {{.*}}, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+}
+
+; fcmp does not depend on rounding mode and has metadata argument.
+define i1 @inlined_11(float %a, float %b) {
+entry:
+  %t = fcmp oeq float %a, %b
+  ret i1 %t
+}
+
+define i1 @host_12(float %a, float %b) #0 {
+entry:
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  %cmp = call i1 @inlined_11(float %a, float %b) #0
+  ret i1 %cmp
+; CHECK_LABEL: @host_12
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+; CHECK: call i1 @llvm.experimental.constrained.fcmp.f32(float {{.*}}, metadata !"oeq", metadata !"fpexcept.ignore") #0
+}
+
+; Intrinsic 'ceil' has constrained variant.
+define float @inlined_13(float %a) {
+entry:
+  %t = call float @llvm.ceil.f32(float %a)
+  ret float %t
+}
+
+define float @host_14(float %a) #0 {
+entry:
+  %0 = call float @inlined_13(float %a) #0
+  %add = call float @llvm.experimental.constrained.fadd.f32(float %0, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %add
+; CHECK_LABEL: @host_14
+; CHECK: call float @llvm.experimental.constrained.ceil.f32(float %a, metadata !"fpexcept.ignore") #0
+; CHECK: call float @llvm.experimental.constrained.fadd.f32(float {{.*}}, float 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+}
+
+attributes #0 = { strictfp }
+
+declare float  @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
+declare i1     @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
+declare float  @llvm.experimental.constrained.ceil.f32(float, metadata)
+declare float  @llvm.ceil.f32(float)