[llvm] [InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL (PR #87474)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 10:50:48 PDT 2024
================
@@ -626,6 +626,88 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) {
return nullptr;
}
+// Check legality for transforming
+// x = 1.0/sqrt(a)
+// r1 = x * x;
+// r2 = a/sqrt(a);
+//
+// TO
+//
+// r1 = 1/a
+// r2 = sqrt(a)
+// x = r1 * r2
+static bool isFSqrtDivToFMulLegal(Instruction *X, ArrayRef<Instruction *> R1,
+ ArrayRef<Instruction *> R2) {
+ BasicBlock *BBx = X->getParent();
+ BasicBlock *BBr1 = R1[0]->getParent();
+ BasicBlock *BBr2 = R2[0]->getParent();
+
+ CallInst *FSqrt = cast<CallInst>(X->getOperand(1));
+ if (!FSqrt->hasAllowReassoc() || !FSqrt->hasNoNaNs() ||
+ !FSqrt->hasNoSignedZeros() || !FSqrt->hasNoInfs())
+ return false;
+
+ // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed
+ // by recip fp as it is strictly meant to transform ops of type a/b to
+ // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag
+ // has been used(rather abused)in the past for algebraic rewrites.
+ if (!X->hasAllowReassoc() || !X->hasAllowReciprocal() || !X->hasNoInfs())
+ return false;
+
+ // Check the constraints on instructions in R1.
+ if (any_of(R1, [BBr1](Instruction *I) {
+ // When you have multiple instructions residing in R1 and R2
+ // respectively, it's difficult to generate combinations of (R1,R2) and
+ // then check if we have the required pattern. So, for now, just be
+ // conservative.
+ return (I->getParent() != BBr1 || !I->hasAllowReassoc());
+ }))
+ return false;
+
+ // Check the constraints on instructions in R2.
+ if (any_of(R2, [BBr2](Instruction *I) {
+ // When you have multiple instructions residing in R1 and R2
+ // respectively, it's difficult to generate combination of (R1,R2) and
+ // then check if we have the required pattern. So, for now, just be
+ // conservative.
+ return (I->getParent() != BBr2 || !I->hasAllowReassoc());
+ }))
+ return false;
+
+ // Check the constraints on X, R1 and R2 combined.
+ // fdiv instruction and one of the multiplications must reside in the same
+ // block. If not, the optimized code may execute more ops than before and
+ // this may hamper the performance.
+ return (BBx == BBr1 || BBx == BBr2);
+}
+
+static void getFSqrtDivOptPattern(Instruction *Div,
+ SmallVectorImpl<Instruction *> &R1,
+ SmallVectorImpl<Instruction *> &R2) {
+ Value *A;
+ if (match(Div, m_FDiv(m_FPOne(), m_Sqrt(m_Value(A)))) ||
+ match(Div, m_FDiv(m_SpecificFP(-1.0), m_Sqrt(m_Value(A))))) {
+ for (User *U : Div->users()) {
+ Instruction *I = dyn_cast<Instruction>(U);
+ if (!I || I->getOpcode() != Instruction::FMul)
----------------
arsenm wrote:
All users of instructions must be instructions, you can just use `cast<>` and drop the null check.
You can also drop the opcode check, it's redundant with the match of m_Fmul below
https://github.com/llvm/llvm-project/pull/87474
More information about the llvm-commits
mailing list