[llvm] Add optimisation for SVE intrinsics with no active lanes (PR #73964)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 6 09:11:38 PST 2023
================
@@ -1891,91 +1944,117 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_ptest_last:
return instCombineSVEPTest(IC, II);
case Intrinsic::aarch64_sve_fabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u);
+ case Intrinsic::aarch64_sve_fabd_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u);
case Intrinsic::aarch64_sve_fadd:
return instCombineSVEVectorFAdd(IC, II);
case Intrinsic::aarch64_sve_fadd_u:
return instCombineSVEVectorFAddU(IC, II);
case Intrinsic::aarch64_sve_fdiv:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u);
+ case Intrinsic::aarch64_sve_fdiv_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u);
case Intrinsic::aarch64_sve_fmax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u);
+ case Intrinsic::aarch64_sve_fmax_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u);
case Intrinsic::aarch64_sve_fmaxnm:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u);
+ case Intrinsic::aarch64_sve_fmaxnm_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u);
case Intrinsic::aarch64_sve_fmin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u);
+ case Intrinsic::aarch64_sve_fmin_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u);
case Intrinsic::aarch64_sve_fminnm:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u);
+ case Intrinsic::aarch64_sve_fminnm_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u);
case Intrinsic::aarch64_sve_fmla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u);
+ case Intrinsic::aarch64_sve_fmla_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u);
case Intrinsic::aarch64_sve_fmls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u);
+ case Intrinsic::aarch64_sve_fmls_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u);
case Intrinsic::aarch64_sve_fmul:
case Intrinsic::aarch64_sve_fmul_u:
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
case Intrinsic::aarch64_sve_fmulx:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u);
+ case Intrinsic::aarch64_sve_fmulx_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u);
case Intrinsic::aarch64_sve_fnmla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u);
+ case Intrinsic::aarch64_sve_fnmla_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u);
case Intrinsic::aarch64_sve_fnmls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u);
+ case Intrinsic::aarch64_sve_fnmls_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u);
case Intrinsic::aarch64_sve_fsub:
return instCombineSVEVectorFSub(IC, II);
case Intrinsic::aarch64_sve_fsub_u:
return instCombineSVEVectorFSubU(IC, II);
case Intrinsic::aarch64_sve_add:
return instCombineSVEVectorAdd(IC, II);
case Intrinsic::aarch64_sve_add_u:
- return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
- Intrinsic::aarch64_sve_mla_u>(
- IC, II, true);
+ return instCombineSVEVectorAddU(IC, II);
case Intrinsic::aarch64_sve_mla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u);
+ case Intrinsic::aarch64_sve_mla_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u);
case Intrinsic::aarch64_sve_mls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u);
+ case Intrinsic::aarch64_sve_mls_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u);
case Intrinsic::aarch64_sve_mul:
case Intrinsic::aarch64_sve_mul_u:
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
case Intrinsic::aarch64_sve_sabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u);
+ case Intrinsic::aarch64_sve_sabd_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u);
case Intrinsic::aarch64_sve_smax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u);
+ case Intrinsic::aarch64_sve_smax_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u);
case Intrinsic::aarch64_sve_smin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u);
+ case Intrinsic::aarch64_sve_smin_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u);
case Intrinsic::aarch64_sve_smulh:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u);
+ case Intrinsic::aarch64_sve_smulh_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u);
case Intrinsic::aarch64_sve_sub:
return instCombineSVEVectorSub(IC, II);
case Intrinsic::aarch64_sve_sub_u:
- return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
- Intrinsic::aarch64_sve_mls_u>(
- IC, II, true);
+ return instCombineSVEVectorSubU(IC, II);
case Intrinsic::aarch64_sve_uabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u);
+ case Intrinsic::aarch64_sve_uabd_u:
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u);
----------------
paulwalker-arm wrote:
I don't like the idea of handling the two intrinsic types together because I recall we only recently split similar instances apart because it was awkward to implement `_u` and non-`_u` specific optimisation. You can already see this for this patch where only half of `instCombineSVEAllOrNoActive` is applicable. I'd rather have dedicated functions for the specific combine so by all means have `instCombineSVEAllOrNoActive` in place of `instCombineSVEAllActive` but I think the `_u` forms should have a dedicated function.
That said, I do wonder how useful the combines that produce `undef` are from a practical sense. I would think that a sign the source material is likely bogus rather than a legitimate optimisation opportunity.
https://github.com/llvm/llvm-project/pull/73964
More information about the llvm-commits
mailing list