[PATCH] D109146: [AArch64][SVE] Replace fmul and fadd LLVM IR instrinsics with fmul and fadd
Matt Devereau via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 2 04:24:36 PDT 2021
MattDevereau created this revision.
MattDevereau added reviewers: paulwalker-arm, peterwaller-arm, bsmith, david-arm, DavidTruby.
Herald added subscribers: ctetreau, psnobl, hiraditya, kristof.beyls, tschuett.
Herald added a reviewer: efriedma.
MattDevereau requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
Replacing fmul and fadd instrinsics with fmul and fadd results in
more succinct AArch64 SVE output, e.g.:
4: 65428041 fmul z1.h, p0/m, z1.h, z2.h
8: 65408020 fadd z0.h, p0/m, z0.h, z1.h
->
4: 65620020 fmla z0.h, p0/m, z1.h, z2.h
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D109146
Files:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
===================================================================
--- llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
+++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll
@@ -66,7 +66,7 @@
define <vscale x 8 x half> @non_idempotent_fmul_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) #0 {
; CHECK-LABEL: @non_idempotent_fmul_f16(
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 0xH4000)
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = fmul <vscale x 8 x half> [[TMP1]], [[A:%.*]]
; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
;
%1 = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 2.0)
@@ -77,7 +77,7 @@
define <vscale x 4 x float> @non_idempotent_fmul_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) #0 {
; CHECK-LABEL: @non_idempotent_fmul_f32(
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.000000e+00)
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = fmul <vscale x 4 x float> [[TMP1]], [[A:%.*]]
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
;
%1 = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 2.0)
@@ -88,7 +88,7 @@
define <vscale x 2 x double> @non_idempotent_fmul_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) #0 {
; CHECK-LABEL: @non_idempotent_fmul_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.000000e+00)
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[TMP1]])
+; CHECK-NEXT: [[TMP2:%.*]] = fmul <vscale x 2 x double> [[TMP1]], [[A:%.*]]
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
;
%1 = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 2.0)
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -715,6 +715,20 @@
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
};
+ auto IsFMul = [](auto *I) {
+ auto *IntrI = dyn_cast<IntrinsicInst>(I);
+ if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_fmul)
+ return false;
+ return true;
+ };
+
+ auto IsFAdd = [](auto *I) {
+ auto *IntrI = dyn_cast<IntrinsicInst>(I);
+ if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_fadd)
+ return false;
+ return true;
+ };
+
// The OpMultiplier variable should always point to the dup (if any), so
// swap if necessary.
if (IsUnitDup(OpMultiplicand) || IsUnitDupX(OpMultiplicand))
@@ -734,6 +748,12 @@
OpMultiplicand->takeName(&II);
return IC.replaceInstUsesWith(II, OpMultiplicand);
}
+ } else if (IsFAdd(&II)) {
+ auto instr = Builder.CreateFAdd(OpMultiplicand, OpMultiplier);
+ return IC.replaceInstUsesWith(II, instr);
+ } else if (IsFMul(&II)) {
+ auto instr = Builder.CreateFMul(OpMultiplicand, OpMultiplier);
+ return IC.replaceInstUsesWith(II, instr);
}
return None;
@@ -823,6 +843,7 @@
return instCombineSVEPTest(IC, II);
case Intrinsic::aarch64_sve_mul:
case Intrinsic::aarch64_sve_fmul:
+ case Intrinsic::aarch64_sve_fadd:
return instCombineSVEVectorMul(IC, II);
case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D109146.370226.patch
Type: text/x-patch
Size: 3967 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210902/0d893d90/attachment.bin>
More information about the llvm-commits
mailing list