[llvm] [LLVM][InstCombine][AArch64] Refactor common SVE intrinsic combines. (PR #126928)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 21 09:28:24 PST 2025
================
@@ -994,6 +994,513 @@ static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
return IC.replaceInstUsesWith(II, NPN);
}
+// A collection of properties common to SVE intrinsics that allow for combines
+// to be written without needing to know the specific intrinsic.
+struct SVEIntrinsicInfo {
+ //
+ // Helper routines for common intrinsic definitions.
+ //
+
+ // e.g. llvm.aarch64.sve.add pg, op1, op2
+ // with IID ==> llvm.aarch64.sve.add_u
+ static SVEIntrinsicInfo
+ defaultMergingOp(Intrinsic::ID IID = Intrinsic::not_intrinsic) {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(0)
+ .setOperandIdxInactiveLanesTakenFrom(1)
+ .setMatchingUndefIntrinsic(IID);
+ }
+
+ // e.g. llvm.aarch64.sve.neg inactive, pg, op
+ static SVEIntrinsicInfo defaultMergingUnaryOp() {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(1)
+ .setOperandIdxInactiveLanesTakenFrom(0)
+ .setOperandIdxWithNoActiveLanes(0);
+ }
+
+ // e.g. llvm.aarch64.sve.add_u pg, op1, op2
+ static SVEIntrinsicInfo defaultUndefOp() {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(0)
+ .setInactiveLanesAreNotDefined();
+ }
+
+ // e.g. llvm.aarch64.sve.prf pg, ptr (GPIndex = 0)
+ // llvm.aarch64.sve.st1 data, pg, ptr (GPIndex = 1)
+ static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex) {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(GPIndex)
+ .setInactiveLanesAreUnused();
+ }
+
+ // e.g. llvm.aarch64.sve.cmpeq pg, op1, op2
+ // llvm.aarch64.sve.ld1 pg, ptr
+ static SVEIntrinsicInfo defaultZeroingOp() {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(0)
+ .setInactiveLanesAreUnused()
+ .setResultIsZeroInitialized();
+ }
+
+ // All properties relate to predication and thus having a general predicate
+ // is the minimum requirement to say there is intrinsic info to act on.
+ explicit operator bool() const { return hasGoverningPredicate(); }
+
+ //
+ // Properties relating to the governing predicate.
+ //
+
+ bool hasGoverningPredicate() const {
+ return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
+ }
+
+ unsigned getGoverningPredicateOperandIdx() const {
+ assert(hasGoverningPredicate() && "Propery not set!");
+ return GoverningPredicateIdx;
+ }
+
+ SVEIntrinsicInfo &setGoverningPredicateOperandIdx(unsigned Index) {
+ assert(!hasGoverningPredicate() && "Cannot set property twice!");
+ GoverningPredicateIdx = Index;
+ return *this;
+ }
+
+ //
+ // Properties relating to operations the intrinsic could be transformed into.
+ // NOTE: This does not mean such a transformation is always possible, but the
+ // knowledge makes it possible to reuse existing optimisations without needing
+ // to embed specific handling for each intrinsic. For example, instruction
+ // simplification can be used to optimise an intrinsic's active lanes.
+ //
+
+ bool hasMatchingUndefIntrinsic() const {
+ return UndefIntrinsic != Intrinsic::not_intrinsic;
+ }
+
+ Intrinsic::ID getMatchingUndefIntrinsic() const {
+ assert(hasMatchingUndefIntrinsic() && "Propery not set!");
+ return UndefIntrinsic;
+ }
+
+ SVEIntrinsicInfo &setMatchingUndefIntrinsic(Intrinsic::ID IID) {
+ assert(!hasMatchingUndefIntrinsic() && "Cannot set property twice!");
+ UndefIntrinsic = IID;
+ return *this;
+ }
+
+ //
+ // Properties relating to the result of inactive lanes.
+ //
+
+ bool inactiveLanesTakenFromOperand() const {
+ return ResultLanes == InactiveLanesTakenFromOperand;
+ }
+
+ unsigned getOperandIdxInactiveLanesTakenFrom() const {
+ assert(inactiveLanesTakenFromOperand() && "Propery not set!");
+ return OperandIdxForInactiveLanes;
+ }
+
+ SVEIntrinsicInfo &setOperandIdxInactiveLanesTakenFrom(unsigned Index) {
+ assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+ ResultLanes = InactiveLanesTakenFromOperand;
+ OperandIdxForInactiveLanes = Index;
+ return *this;
+ }
+
+ bool inactiveLanesAreNotDefined() const {
+ return ResultLanes == InactiveLanesAreNotDefined;
+ }
+
+ SVEIntrinsicInfo &setInactiveLanesAreNotDefined() {
+ assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+ ResultLanes = InactiveLanesAreNotDefined;
+ return *this;
+ }
+
+ bool inactiveLanesAreUnused() const {
+ return ResultLanes == InactiveLanesAreUnused;
+ }
+
+ SVEIntrinsicInfo &setInactiveLanesAreUnused() {
+ assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+ ResultLanes = InactiveLanesAreUnused;
+ return *this;
+ }
+
+ // NOTE: Whilst not limited to only inactive lanes, the common use case is:
+ // inactiveLanesAreZerod =
+ // resultIsZeroInitialized() && inactiveLanesAreUnused()
+ bool resultIsZeroInitialized() const { return ResultIsZeroInitialized; }
+
+ SVEIntrinsicInfo &setResultIsZeroInitialized() {
+ ResultIsZeroInitialized = true;
+ return *this;
+ }
+
+ //
+ // The first operand of unary merging operations is typically only used to
+ // set the result for inactive lanes. Knowing this allows us to deadcode the
+ // operand when we can prove there are no inactive lanes.
+ //
+
+ bool hasOperandWithNoActiveLanes() const {
+ return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
+ }
+
+ unsigned getOperandIdxWithNoActiveLanes() const {
+ assert(hasOperandWithNoActiveLanes() && "Propery not set!");
+ return OperandIdxWithNoActiveLanes;
+ }
+
+ SVEIntrinsicInfo &setOperandIdxWithNoActiveLanes(unsigned Index) {
+ assert(!hasOperandWithNoActiveLanes() && "Cannot set property twice!");
+ OperandIdxWithNoActiveLanes = Index;
+ return *this;
+ }
+
+private:
+ unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
+
+ Intrinsic::ID UndefIntrinsic = Intrinsic::not_intrinsic;
+
+ enum PredicationStyle {
+ Uninitialized,
+ InactiveLanesTakenFromOperand,
+ InactiveLanesAreNotDefined,
+ InactiveLanesAreUnused
+ } ResultLanes = Uninitialized;
+
+ bool ResultIsZeroInitialized = false;
+ unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
+ unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
+};
+
+static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
+ // Some SVE intrinsics do not use scalable vector types, but since they are
+ // not relevant from an SVEIntrinsicInfo perspective, they are also ignored.
+ if (!isa<ScalableVectorType>(II.getType()) &&
+ all_of(II.args(), [&](const Value *V) {
+ return !isa<ScalableVectorType>(V->getType());
+ }))
+ return SVEIntrinsicInfo();
+
+ Intrinsic::ID IID = II.getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
+ case Intrinsic::aarch64_sve_fcvt_f16f32:
+ case Intrinsic::aarch64_sve_fcvt_f16f64:
+ case Intrinsic::aarch64_sve_fcvt_f32f16:
+ case Intrinsic::aarch64_sve_fcvt_f32f64:
+ case Intrinsic::aarch64_sve_fcvt_f64f16:
+ case Intrinsic::aarch64_sve_fcvt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtlt_f32f16:
+ case Intrinsic::aarch64_sve_fcvtlt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtx_f32f64:
+ case Intrinsic::aarch64_sve_fcvtzs:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f32:
+ case Intrinsic::aarch64_sve_fcvtzu:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f32:
+ case Intrinsic::aarch64_sve_scvtf:
+ case Intrinsic::aarch64_sve_scvtf_f16i32:
+ case Intrinsic::aarch64_sve_scvtf_f16i64:
+ case Intrinsic::aarch64_sve_scvtf_f32i64:
+ case Intrinsic::aarch64_sve_scvtf_f64i32:
+ case Intrinsic::aarch64_sve_ucvtf:
+ case Intrinsic::aarch64_sve_ucvtf_f16i32:
+ case Intrinsic::aarch64_sve_ucvtf_f16i64:
+ case Intrinsic::aarch64_sve_ucvtf_f32i64:
+ case Intrinsic::aarch64_sve_ucvtf_f64i32:
+ return SVEIntrinsicInfo::defaultMergingUnaryOp();
+
+ case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
+ case Intrinsic::aarch64_sve_fcvtnt_f16f32:
+ case Intrinsic::aarch64_sve_fcvtnt_f32f64:
+ case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(1)
+ .setOperandIdxInactiveLanesTakenFrom(0);
+
+ case Intrinsic::aarch64_sve_fabd:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fabd_u);
+ case Intrinsic::aarch64_sve_fadd:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fadd_u);
+ case Intrinsic::aarch64_sve_fdiv:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fdiv_u);
+ case Intrinsic::aarch64_sve_fmax:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmax_u);
+ case Intrinsic::aarch64_sve_fmaxnm:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmaxnm_u);
+ case Intrinsic::aarch64_sve_fmin:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmin_u);
+ case Intrinsic::aarch64_sve_fminnm:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fminnm_u);
+ case Intrinsic::aarch64_sve_fmla:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmla_u);
+ case Intrinsic::aarch64_sve_fmls:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmls_u);
+ case Intrinsic::aarch64_sve_fmul:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmul_u);
+ case Intrinsic::aarch64_sve_fmulx:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmulx_u);
+ case Intrinsic::aarch64_sve_fnmla:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fnmla_u);
+ case Intrinsic::aarch64_sve_fnmls:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fnmls_u);
+ case Intrinsic::aarch64_sve_fsub:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fsub_u);
+ case Intrinsic::aarch64_sve_add:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_add_u);
+ case Intrinsic::aarch64_sve_mla:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mla_u);
+ case Intrinsic::aarch64_sve_mls:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mls_u);
+ case Intrinsic::aarch64_sve_mul:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mul_u);
+ case Intrinsic::aarch64_sve_sabd:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sabd_u);
+ case Intrinsic::aarch64_sve_smax:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smax_u);
+ case Intrinsic::aarch64_sve_smin:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smin_u);
+ case Intrinsic::aarch64_sve_smulh:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smulh_u);
+ case Intrinsic::aarch64_sve_sub:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sub_u);
+ case Intrinsic::aarch64_sve_uabd:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uabd_u);
+ case Intrinsic::aarch64_sve_umax:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umax_u);
+ case Intrinsic::aarch64_sve_umin:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umin_u);
+ case Intrinsic::aarch64_sve_umulh:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umulh_u);
+ case Intrinsic::aarch64_sve_asr:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_asr_u);
+ case Intrinsic::aarch64_sve_lsl:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsl_u);
+ case Intrinsic::aarch64_sve_lsr:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsr_u);
+ case Intrinsic::aarch64_sve_and:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_and_u);
+ case Intrinsic::aarch64_sve_bic:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_bic_u);
+ case Intrinsic::aarch64_sve_eor:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_eor_u);
+ case Intrinsic::aarch64_sve_orr:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_orr_u);
+ case Intrinsic::aarch64_sve_sqsub:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqsub_u);
+ case Intrinsic::aarch64_sve_uqsub:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqsub_u);
+
+ case Intrinsic::aarch64_sve_addqv:
+ case Intrinsic::aarch64_sve_and_z:
+ case Intrinsic::aarch64_sve_bic_z:
+ case Intrinsic::aarch64_sve_brka_z:
+ case Intrinsic::aarch64_sve_brkb_z:
+ case Intrinsic::aarch64_sve_brkn_z:
+ case Intrinsic::aarch64_sve_brkpa_z:
+ case Intrinsic::aarch64_sve_brkpb_z:
+ case Intrinsic::aarch64_sve_cntp:
+ case Intrinsic::aarch64_sve_compact:
+ case Intrinsic::aarch64_sve_eor_z:
+ case Intrinsic::aarch64_sve_eorv:
+ case Intrinsic::aarch64_sve_eorqv:
+ case Intrinsic::aarch64_sve_nand_z:
+ case Intrinsic::aarch64_sve_nor_z:
+ case Intrinsic::aarch64_sve_orn_z:
+ case Intrinsic::aarch64_sve_orr_z:
+ case Intrinsic::aarch64_sve_orv:
+ case Intrinsic::aarch64_sve_orqv:
+ case Intrinsic::aarch64_sve_pnext:
+ case Intrinsic::aarch64_sve_rdffr_z:
+ case Intrinsic::aarch64_sve_saddv:
+ case Intrinsic::aarch64_sve_uaddv:
+ case Intrinsic::aarch64_sve_umaxv:
+ case Intrinsic::aarch64_sve_umaxqv:
+ case Intrinsic::aarch64_sve_cmpeq:
+ case Intrinsic::aarch64_sve_cmpeq_wide:
+ case Intrinsic::aarch64_sve_cmpge:
+ case Intrinsic::aarch64_sve_cmpge_wide:
+ case Intrinsic::aarch64_sve_cmpgt:
+ case Intrinsic::aarch64_sve_cmpgt_wide:
+ case Intrinsic::aarch64_sve_cmphi:
+ case Intrinsic::aarch64_sve_cmphi_wide:
+ case Intrinsic::aarch64_sve_cmphs:
+ case Intrinsic::aarch64_sve_cmphs_wide:
+ case Intrinsic::aarch64_sve_cmple_wide:
+ case Intrinsic::aarch64_sve_cmplo_wide:
+ case Intrinsic::aarch64_sve_cmpls_wide:
+ case Intrinsic::aarch64_sve_cmplt_wide:
+ case Intrinsic::aarch64_sve_cmpne:
+ case Intrinsic::aarch64_sve_cmpne_wide:
+ case Intrinsic::aarch64_sve_facge:
+ case Intrinsic::aarch64_sve_facgt:
+ case Intrinsic::aarch64_sve_fcmpeq:
+ case Intrinsic::aarch64_sve_fcmpge:
+ case Intrinsic::aarch64_sve_fcmpgt:
+ case Intrinsic::aarch64_sve_fcmpne:
+ case Intrinsic::aarch64_sve_fcmpuo:
+ case Intrinsic::aarch64_sve_ld1:
+ case Intrinsic::aarch64_sve_ld1_gather:
+ case Intrinsic::aarch64_sve_ld1_gather_index:
+ case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_ld1_gather_sxtw:
+ case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_ld1_gather_uxtw:
+ case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_ld1q_gather_index:
+ case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
+ case Intrinsic::aarch64_sve_ld1ro:
+ case Intrinsic::aarch64_sve_ld1rq:
+ case Intrinsic::aarch64_sve_ld1udq:
+ case Intrinsic::aarch64_sve_ld1uwq:
+ case Intrinsic::aarch64_sve_ld2_sret:
+ case Intrinsic::aarch64_sve_ld2q_sret:
+ case Intrinsic::aarch64_sve_ld3_sret:
+ case Intrinsic::aarch64_sve_ld3q_sret:
+ case Intrinsic::aarch64_sve_ld4_sret:
+ case Intrinsic::aarch64_sve_ld4q_sret:
+ case Intrinsic::aarch64_sve_ldff1:
+ case Intrinsic::aarch64_sve_ldff1_gather:
+ case Intrinsic::aarch64_sve_ldff1_gather_index:
+ case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
+ case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
+ case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_ldnf1:
+ case Intrinsic::aarch64_sve_ldnt1:
+ case Intrinsic::aarch64_sve_ldnt1_gather:
+ case Intrinsic::aarch64_sve_ldnt1_gather_index:
+ case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
+ return SVEIntrinsicInfo::defaultZeroingOp();
+
+ case Intrinsic::aarch64_sve_prf:
+ case Intrinsic::aarch64_sve_prfb_gather_index:
+ case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfd_gather_index:
+ case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfh_gather_index:
+ case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfw_gather_index:
+ case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
+ return SVEIntrinsicInfo::defaultVoidOp(0);
+
+ case Intrinsic::aarch64_sve_st1_scatter:
+ case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
+ case Intrinsic::aarch64_sve_st1_scatter_sxtw:
+ case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
+ case Intrinsic::aarch64_sve_st1_scatter_uxtw:
+ case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
+ case Intrinsic::aarch64_sve_st1dq:
+ case Intrinsic::aarch64_sve_st1q_scatter_index:
+ case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
+ case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
+ case Intrinsic::aarch64_sve_st1wq:
+ case Intrinsic::aarch64_sve_stnt1:
+ case Intrinsic::aarch64_sve_stnt1_scatter:
+ case Intrinsic::aarch64_sve_stnt1_scatter_index:
+ case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
+ case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
+ return SVEIntrinsicInfo::defaultVoidOp(1);
+ case Intrinsic::aarch64_sve_st2:
+ case Intrinsic::aarch64_sve_st2q:
+ return SVEIntrinsicInfo::defaultVoidOp(2);
+ case Intrinsic::aarch64_sve_st3:
+ case Intrinsic::aarch64_sve_st3q:
+ return SVEIntrinsicInfo::defaultVoidOp(3);
+ case Intrinsic::aarch64_sve_st4:
+ case Intrinsic::aarch64_sve_st4q:
+ return SVEIntrinsicInfo::defaultVoidOp(4);
+ }
+
+ return SVEIntrinsicInfo();
+}
+
+static bool isAllActivePredicate(Value *Pred) {
+ // Look through convert.from.svbool(convert.to.svbool(...) chain.
+ Value *UncastedPred;
+ if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
+ m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
+ m_Value(UncastedPred)))))
+ // If the predicate has the same or less lanes than the uncasted
+ // predicate then we know the casting has no effect.
+ if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
+ cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
+ Pred = UncastedPred;
+
+ return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>()));
+}
+
+// Use SVE intrinsic info to eliminate redundant operands and/or canonicalise
+// to operations with less strict inactive lane requirements.
+static std::optional<Instruction *>
+simplifySVEIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+ const SVEIntrinsicInfo &IInfo) {
+ if (!IInfo.hasGoverningPredicate())
+ return std::nullopt;
+
+ auto *OpPredicate = II.getOperand(IInfo.getGoverningPredicateOperandIdx());
+
+ // If there are no active lanes.
+ if (match(OpPredicate, m_ZeroInt())) {
+ if (IInfo.inactiveLanesTakenFromOperand())
+ return IC.replaceInstUsesWith(
+ II, II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom()));
+
+ if (IInfo.inactiveLanesAreUnused()) {
+ if (IInfo.resultIsZeroInitialized()) {
+ IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
+ // Ensure intrinsics with side effects (e.g. ld1ff) are removed.
+ return IC.eraseInstFromFunction(II);
----------------
david-arm wrote:
I realise you may not have written this code, but if they do have side-effects, doesn't that mean we can't remove them given they were in the original C code? Or does this comment mean that at the C/ACLE level they are not defined to have side-effects, but if we leave the intrinsic call in the IR it will lead to unwanted (and unncessary) side-effects that act as barriers to optimisations?
https://github.com/llvm/llvm-project/pull/126928
More information about the llvm-commits
mailing list