[llvm] [InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL (PR #87474)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 20 10:50:50 PDT 2024


================
@@ -1796,6 +1878,64 @@ static Instruction *foldFDivSqrtDivisor(BinaryOperator &I,
   return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I);
 }
 
+// Change
+// X = 1/sqrt(a)
+// R1 = X * X
+// R2 = a * X
+//
+// TO
+//
+// Tmp1 = 1/a
+// Tmp2 = sqrt(a)
+// Tmp3 = Tmp1 * Tmp2
+// Replace Uses Of R1 With Tmp1
+// Replace Uses Of R2 With Tmp2
+// Replace Uses Of X With Tmp3
+static Value *convertFSqrtDivIntoFMul(CallInst *CI, Instruction *X,
+                                      ArrayRef<Instruction *> R1,
+                                      ArrayRef<Instruction *> R2, Value *SqrtOp,
+                                      InstCombiner::BuilderTy &B) {
+
+  B.SetInsertPoint(X);
+
+  // Every instance of R1 may have different fpmath metadata and fpmath flags.
+  // We try to preserve them by having seperate fdiv instruction per R1
+  // instance.
+  Instruction *Tmp1;
+  for (Instruction *I : R1) {
+    Tmp1 = cast<Instruction>(
+        B.CreateFDiv(ConstantFP::get(R1[0]->getType(), 1.0), SqrtOp));
+    Tmp1->copyMetadata(*I);
+    Tmp1->copyFastMathFlags(I);
+    I->replaceAllUsesWith(Tmp1);
+  }
+
+  // Although, by value, Tmp2 = CI , every instance of R2 may have different
+  // fpmath metadata and fpmath flags. We try to preserve them by cloning the
+  // call instruction per R2 instance.
+  CallInst *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, SqrtOp);
+  Instruction *Tmp2;
+  for (Instruction *I : R2) {
+    Tmp2 = Sqrt->clone();
+    Tmp2->insertBefore(CI);
+    Tmp2->setName("sqrt");
----------------
arsenm wrote:

Either take the name from an existing value or don't add one 

https://github.com/llvm/llvm-project/pull/87474


More information about the llvm-commits mailing list