[llvm] 2f887c9 - [InstCombine] Extend SVEVectorFuseMulAddSub to support newly added "undef" intrinsics.
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 12 04:31:40 PDT 2023
Author: Paul Walker
Date: 2023-03-12T10:39:24Z
New Revision: 2f887c9a760dfdffa584ce84361912fe122ad79f
URL: https://github.com/llvm/llvm-project/commit/2f887c9a760dfdffa584ce84361912fe122ad79f
DIFF: https://github.com/llvm/llvm-project/commit/2f887c9a760dfdffa584ce84361912fe122ad79f.diff
LOG: [InstCombine] Extend SVEVectorFuseMulAddSub to support newly added "undef" intrinsics.
D143767 will change the intrinsics used to lower floating-point
svadd_x, svmul_x and svsub_x builtins. This will result in the
combines added as part of D140200 to no longer fire in all cases.
This patch extends the existing combines for contraction to cover
fadd_u, fmul_u and fsub_u intrinsics.
Differential Revision: https://reviews.llvm.org/D144413
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 5cd00584db64a..270c6d13c225b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1619,9 +1619,17 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_fadd:
case Intrinsic::aarch64_sve_add:
return instCombineSVEVectorAdd(IC, II);
+ case Intrinsic::aarch64_sve_fadd_u:
+ return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
+ Intrinsic::aarch64_sve_fmla_u>(
+ IC, II, true);
case Intrinsic::aarch64_sve_fsub:
case Intrinsic::aarch64_sve_sub:
return instCombineSVEVectorSub(IC, II);
+ case Intrinsic::aarch64_sve_fsub_u:
+ return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
+ Intrinsic::aarch64_sve_fmls_u>(
+ IC, II, true);
case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);
case Intrinsic::aarch64_sve_uunpkhi:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll
index 00213f8bb17b0..18d3f0a6891c5 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-muladdsub.ll
@@ -15,6 +15,18 @@ define <vscale x 8 x half> @combine_fmla(<vscale x 16 x i1> %p, <vscale x 8 x ha
ret <vscale x 8 x half> %3
}
+define <vscale x 8 x half> @combine_fmla_u(<vscale x 16 x i1> %p, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+; CHECK-LABEL: @combine_fmla_u(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[P:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[C:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+;
+ %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %p)
+ %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ %3 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %c, <vscale x 8 x half> %2)
+ ret <vscale x 8 x half> %3
+}
+
define <vscale x 16 x i8> @combine_mla_i8(<vscale x 16 x i1> %p, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
; CHECK-LABEL: @combine_mla_i8(
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> [[P:%.*]], <vscale x 16 x i8> [[C:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]])
@@ -59,6 +71,18 @@ define <vscale x 8 x half> @combine_fmls(<vscale x 16 x i1> %p, <vscale x 8 x ha
ret <vscale x 8 x half> %3
}
+define <vscale x 8 x half> @combine_fmls_u(<vscale x 16 x i1> %p, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+; CHECK-LABEL: @combine_fmls_u(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[P:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[C:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]])
+; CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+;
+ %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %p)
+ %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ %3 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %c, <vscale x 8 x half> %2)
+ ret <vscale x 8 x half> %3
+}
+
define <vscale x 16 x i8> @combine_mls_i8(<vscale x 16 x i1> %p, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
; CHECK-LABEL: @combine_mls_i8(
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> [[P:%.*]], <vscale x 16 x i8> [[C:%.*]], <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]])
@@ -173,6 +197,9 @@ declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x
declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
More information about the llvm-commits
mailing list