[PATCH] D113095: Combine FADD and FMUL aarch64 intrinsics to FMLA
Matt Devereau via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 3 04:39:53 PDT 2021
MattDevereau created this revision.
MattDevereau added reviewers: peterwaller-arm, paulwalker-arm, bsmith, david-arm, DavidTruby.
Herald added subscribers: dexonsmith, hiraditya, kristof.beyls.
MattDevereau requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
This is a refinement to the work in
https://reviews.llvm.org/D111638
Fold (fadd p a (fmul p b c)) into (fma p a b c)
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D113095
Files:
llvm/include/llvm/IR/Operator.h
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -695,6 +695,37 @@
return None;
}
+static Optional<Instruction *> instCombineSVEVectorFMLA(InstCombiner &IC,
+ IntrinsicInst &II) {
+ // fold (fadd p a (fmul p b c)) -> (fma p a b c)
+ Value *p = II.getOperand(0);
+ Value *a = II.getOperand(1);
+ auto FMul = II.getOperand(2);
+ Value *b, *c;
+ if (!match(FMul, m_Intrinsic<Intrinsic::aarch64_sve_fmul>(
+ m_Deferred(p), m_Value(b), m_Value(c))))
+ return None;
+
+ if (!FMul->hasOneUse())
+ return None;
+
+ llvm::FastMathFlags FAddFlags = II.getFastMathFlags();
+ llvm::FastMathFlags FMulFlags = cast<CallInst>(FMul)->getFastMathFlags();
+ // Don't combine when FMul & Fadd flags differ to prevent the loss of any
+ // additional important flags
+ if (FAddFlags != FMulFlags)
+ return None;
+ if (!FAddFlags.allowContract() || !FMulFlags.allowContract())
+ return None;
+
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ auto FMLA = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fmla,
+ {II.getType()}, {p, a, b, c}, &II);
+ FMLA->setFastMathFlags(FAddFlags);
+ return IC.replaceInstUsesWith(II, FMLA);
+}
+
static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
switch (Intrinsic) {
case Intrinsic::aarch64_sve_fmul:
@@ -724,6 +755,14 @@
return IC.replaceInstUsesWith(II, BinOp);
}
+static Optional<Instruction *> instCombineSVEVectorFAdd(InstCombiner &IC,
+ IntrinsicInst &II) {
+ auto FMLA = instCombineSVEVectorFMLA(IC, II);
+ if (FMLA)
+ return FMLA;
+ return instCombineSVEVectorBinOp(IC, II);
+}
+
static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
IntrinsicInst &II) {
auto *OpPredicate = II.getOperand(0);
@@ -901,6 +940,7 @@
case Intrinsic::aarch64_sve_fmul:
return instCombineSVEVectorMul(IC, II);
case Intrinsic::aarch64_sve_fadd:
+ return instCombineSVEVectorFAdd(IC, II);
case Intrinsic::aarch64_sve_fsub:
return instCombineSVEVectorBinOp(IC, II);
case Intrinsic::aarch64_sve_tbl:
Index: llvm/include/llvm/IR/Operator.h
===================================================================
--- llvm/include/llvm/IR/Operator.h
+++ llvm/include/llvm/IR/Operator.h
@@ -247,6 +247,9 @@
void operator|=(const FastMathFlags &OtherFlags) {
Flags |= OtherFlags.Flags;
}
+ bool operator!=(const FastMathFlags &OtherFlags) const {
+ return Flags != OtherFlags.Flags;
+ }
};
/// Utility class for floating point operations which can have
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D113095.384396.patch
Type: text/x-patch
Size: 2937 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211103/e6114e53/attachment.bin>
More information about the llvm-commits
mailing list