[llvm] [InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL (PR #87474)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 23:11:00 PST 2024
================
@@ -1864,6 +1949,64 @@ static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I);
}
+// Change
+// X = 1/sqrt(a)
+// R1 = X * X
+// R2 = a * X
+//
+// TO
+//
+// FDiv = 1/a
+// FSqrt = sqrt(a)
+// FMul = FDiv * FSqrt
+// Replace Uses Of R1 With FDiv
+// Replace Uses Of R2 With FSqrt
+// Replace Uses Of X With FMul
+static Value *convertFSqrtDivIntoFMul(CallInst *CI, Instruction *X,
+ SmallPtrSetImpl<Instruction *> &R1,
+ SmallPtrSetImpl<Instruction *> &R2,
+ InstCombiner::BuilderTy &B) {
+
+ B.SetInsertPoint(X);
+
+ // Every instance of R1 may have different fpmath metadata and fpmath flags.
+ // We try to preserve them by having seperate fdiv instruction per R1
+ // instance.
+ Value *SqrtOp = CI->getArgOperand(0);
+ Instruction *FDiv;
+
+ for (Instruction *I : R1) {
+ FDiv = cast<Instruction>(
+ B.CreateFDiv(ConstantFP::get((*R1.begin())->getType(), 1.0), SqrtOp));
+ FDiv->copyMetadata(*I);
+ FDiv->copyFastMathFlags(I);
+ I->replaceAllUsesWith(FDiv);
+ }
+
+ // Although, by value, FSqrt = CI , every instance of R2 may have different
+ // fpmath metadata and fpmath flags. We try to preserve them by cloning the
+ // call instruction per R2 instance.
+ CallInst *FSqrt;
+ for (Instruction *I : R2) {
+ FSqrt = cast<CallInst>(CI->clone());
+ FSqrt->insertBefore(CI);
+ FSqrt->copyFastMathFlags(I);
+ FSqrt->copyMetadata(*I);
+ I->replaceAllUsesWith(FSqrt);
----------------
topperc wrote:
You should maybe use `InstCombiner::replaceInstUsesWith` so the users of I are added to the worklist.
https://github.com/llvm/llvm-project/pull/87474
More information about the llvm-commits
mailing list