[llvm] [LLVM][InstCombine][AArch64] Refactor common SVE intrinsic combines. (PR #126928)
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 04:12:38 PDT 2025
================
@@ -994,6 +994,509 @@ static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
return IC.replaceInstUsesWith(II, NPN);
}
+// A collection of properties common to SVE intrinsics that allow for combines
+// to be written without needing to know the specific intrinsic.
+struct SVEIntrinsicInfo {
+ //
+ // Helper routines for common intrinsic definitions.
+ //
+
+ // e.g. llvm.aarch64.sve.add pg, op1, op2
+ // with IID ==> llvm.aarch64.sve.add_u
+ static SVEIntrinsicInfo
+ defaultMergingOp(Intrinsic::ID IID = Intrinsic::not_intrinsic) {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(0)
+ .setOperandIdxInactiveLanesTakenFrom(1)
+ .setMatchingUndefIntrinsic(IID);
+ }
+
+ // e.g. llvm.aarch64.sve.neg inactive, pg, op
+ static SVEIntrinsicInfo defaultMergingUnaryOp() {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(1)
+ .setOperandIdxInactiveLanesTakenFrom(0)
+ .setOperandIdxWithNoActiveLanes(0);
+ }
+
+ // e.g. llvm.aarch64.sve.add_u pg, op1, op2
+ static SVEIntrinsicInfo defaultUndefOp() {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(0)
+ .setInactiveLanesAreNotDefined();
+ }
+
+ // e.g. llvm.aarch64.sve.prf pg, ptr (GPIndex = 0)
+ // llvm.aarch64.sve.st1 data, pg, ptr (GPIndex = 1)
+ static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex) {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(GPIndex)
+ .setInactiveLanesAreUnused();
+ }
+
+ // e.g. llvm.aarch64.sve.cmpeq pg, op1, op2
+ // llvm.aarch64.sve.ld1 pg, ptr
+ static SVEIntrinsicInfo defaultZeroingOp() {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(0)
+ .setInactiveLanesAreUnused()
+ .setResultIsZeroInitialized();
+ }
+
+ // All properties relate to predication and thus having a general predicate
+ // is the minimum requirement to say there is intrinsic info to act on.
+ explicit operator bool() const { return hasGoverningPredicate(); }
+
+ //
+ // Properties relating to the governing predicate.
+ //
+
+ bool hasGoverningPredicate() const {
+ return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
+ }
+
+ unsigned getGoverningPredicateOperandIdx() const {
+ assert(hasGoverningPredicate() && "Propery not set!");
+ return GoverningPredicateIdx;
+ }
+
+ SVEIntrinsicInfo &setGoverningPredicateOperandIdx(unsigned Index) {
+ assert(!hasGoverningPredicate() && "Cannot set property twice!");
+ GoverningPredicateIdx = Index;
+ return *this;
+ }
+
+ //
+ // Properties relating to operations the intrinsic could be transformed into.
+ // NOTE: This does not mean such a transformation is always possible, but the
+ // knowledge makes it possible to reuse existing optimisations without needing
+ // to embed specific handling for each intrinsic. For example, instruction
+ // simplification can be used to optimise an intrinsic's active lanes.
+ //
+
+ bool hasMatchingUndefIntrinsic() const {
+ return UndefIntrinsic != Intrinsic::not_intrinsic;
+ }
+
+ Intrinsic::ID getMatchingUndefIntrinsic() const {
+ assert(hasMatchingUndefIntrinsic() && "Propery not set!");
+ return UndefIntrinsic;
+ }
+
+ SVEIntrinsicInfo &setMatchingUndefIntrinsic(Intrinsic::ID IID) {
+ assert(!hasMatchingUndefIntrinsic() && "Cannot set property twice!");
+ UndefIntrinsic = IID;
+ return *this;
+ }
+
+ //
+ // Properties relating to the result of inactive lanes.
+ //
+
+ bool inactiveLanesTakenFromOperand() const {
+ return ResultLanes == InactiveLanesTakenFromOperand;
+ }
+
+ unsigned getOperandIdxInactiveLanesTakenFrom() const {
+ assert(inactiveLanesTakenFromOperand() && "Propery not set!");
+ return OperandIdxForInactiveLanes;
+ }
+
+ SVEIntrinsicInfo &setOperandIdxInactiveLanesTakenFrom(unsigned Index) {
+ assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+ ResultLanes = InactiveLanesTakenFromOperand;
+ OperandIdxForInactiveLanes = Index;
+ return *this;
+ }
+
+ bool inactiveLanesAreNotDefined() const {
+ return ResultLanes == InactiveLanesAreNotDefined;
+ }
+
+ SVEIntrinsicInfo &setInactiveLanesAreNotDefined() {
+ assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+ ResultLanes = InactiveLanesAreNotDefined;
+ return *this;
+ }
+
+ bool inactiveLanesAreUnused() const {
+ return ResultLanes == InactiveLanesAreUnused;
+ }
+
+ SVEIntrinsicInfo &setInactiveLanesAreUnused() {
+ assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+ ResultLanes = InactiveLanesAreUnused;
+ return *this;
+ }
+
+ // NOTE: Whilst not limited to only inactive lanes, the common use case is:
+ // inactiveLanesAreZerod =
+ // resultIsZeroInitialized() && inactiveLanesAreUnused()
+ bool resultIsZeroInitialized() const { return ResultIsZeroInitialized; }
+
+ SVEIntrinsicInfo &setResultIsZeroInitialized() {
+ ResultIsZeroInitialized = true;
+ return *this;
+ }
+
+ //
+ // The first operand of unary merging operations is typically only used to
+ // set the result for inactive lanes. Knowing this allows us to deadcode the
+ // operand when we can prove there are no inactive lanes.
+ //
+
+ bool hasOperandWithNoActiveLanes() const {
+ return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
+ }
+
+ unsigned getOperandIdxWithNoActiveLanes() const {
+ assert(hasOperandWithNoActiveLanes() && "Propery not set!");
+ return OperandIdxWithNoActiveLanes;
+ }
+
+ SVEIntrinsicInfo &setOperandIdxWithNoActiveLanes(unsigned Index) {
+ assert(!hasOperandWithNoActiveLanes() && "Cannot set property twice!");
+ OperandIdxWithNoActiveLanes = Index;
+ return *this;
+ }
+
+private:
+ unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
+
+ Intrinsic::ID UndefIntrinsic = Intrinsic::not_intrinsic;
+
+ enum PredicationStyle {
+ Uninitialized,
+ InactiveLanesTakenFromOperand,
+ InactiveLanesAreNotDefined,
+ InactiveLanesAreUnused
+ } ResultLanes = Uninitialized;
+
+ bool ResultIsZeroInitialized = false;
+ unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
+ unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
+};
+
+static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
+ // Some SVE intrinsics do not use scalable vector types, but since they are
+ // not relevant from an SVEIntrinsicInfo perspective, they are also ignored.
+ if (!isa<ScalableVectorType>(II.getType()) &&
+ all_of(II.args(), [&](const Value *V) {
+ return !isa<ScalableVectorType>(V->getType());
+ }))
+ return SVEIntrinsicInfo();
+
+ Intrinsic::ID IID = II.getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
+ case Intrinsic::aarch64_sve_fcvt_f16f32:
+ case Intrinsic::aarch64_sve_fcvt_f16f64:
+ case Intrinsic::aarch64_sve_fcvt_f32f16:
+ case Intrinsic::aarch64_sve_fcvt_f32f64:
+ case Intrinsic::aarch64_sve_fcvt_f64f16:
+ case Intrinsic::aarch64_sve_fcvt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtlt_f32f16:
+ case Intrinsic::aarch64_sve_fcvtlt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtx_f32f64:
+ case Intrinsic::aarch64_sve_fcvtzs:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f32:
+ case Intrinsic::aarch64_sve_fcvtzu:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f32:
+ case Intrinsic::aarch64_sve_scvtf:
+ case Intrinsic::aarch64_sve_scvtf_f16i32:
+ case Intrinsic::aarch64_sve_scvtf_f16i64:
+ case Intrinsic::aarch64_sve_scvtf_f32i64:
+ case Intrinsic::aarch64_sve_scvtf_f64i32:
+ case Intrinsic::aarch64_sve_ucvtf:
+ case Intrinsic::aarch64_sve_ucvtf_f16i32:
+ case Intrinsic::aarch64_sve_ucvtf_f16i64:
+ case Intrinsic::aarch64_sve_ucvtf_f32i64:
+ case Intrinsic::aarch64_sve_ucvtf_f64i32:
+ return SVEIntrinsicInfo::defaultMergingUnaryOp();
+
+ case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
+ case Intrinsic::aarch64_sve_fcvtnt_f16f32:
+ case Intrinsic::aarch64_sve_fcvtnt_f32f64:
+ case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(1)
+ .setOperandIdxInactiveLanesTakenFrom(0);
+
+ case Intrinsic::aarch64_sve_fabd:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fabd_u);
+ case Intrinsic::aarch64_sve_fadd:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fadd_u);
+ case Intrinsic::aarch64_sve_fdiv:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fdiv_u);
+ case Intrinsic::aarch64_sve_fmax:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmax_u);
+ case Intrinsic::aarch64_sve_fmaxnm:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmaxnm_u);
+ case Intrinsic::aarch64_sve_fmin:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmin_u);
+ case Intrinsic::aarch64_sve_fminnm:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fminnm_u);
+ case Intrinsic::aarch64_sve_fmla:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmla_u);
+ case Intrinsic::aarch64_sve_fmls:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmls_u);
+ case Intrinsic::aarch64_sve_fmul:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmul_u);
+ case Intrinsic::aarch64_sve_fmulx:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmulx_u);
+ case Intrinsic::aarch64_sve_fnmla:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fnmla_u);
+ case Intrinsic::aarch64_sve_fnmls:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fnmls_u);
+ case Intrinsic::aarch64_sve_fsub:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fsub_u);
+ case Intrinsic::aarch64_sve_add:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_add_u);
+ case Intrinsic::aarch64_sve_mla:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mla_u);
+ case Intrinsic::aarch64_sve_mls:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mls_u);
+ case Intrinsic::aarch64_sve_mul:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mul_u);
+ case Intrinsic::aarch64_sve_sabd:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sabd_u);
+ case Intrinsic::aarch64_sve_smax:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smax_u);
+ case Intrinsic::aarch64_sve_smin:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smin_u);
+ case Intrinsic::aarch64_sve_smulh:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smulh_u);
+ case Intrinsic::aarch64_sve_sub:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sub_u);
+ case Intrinsic::aarch64_sve_uabd:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uabd_u);
+ case Intrinsic::aarch64_sve_umax:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umax_u);
+ case Intrinsic::aarch64_sve_umin:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umin_u);
+ case Intrinsic::aarch64_sve_umulh:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umulh_u);
+ case Intrinsic::aarch64_sve_asr:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_asr_u);
+ case Intrinsic::aarch64_sve_lsl:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsl_u);
+ case Intrinsic::aarch64_sve_lsr:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsr_u);
+ case Intrinsic::aarch64_sve_and:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_and_u);
+ case Intrinsic::aarch64_sve_bic:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_bic_u);
+ case Intrinsic::aarch64_sve_eor:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_eor_u);
+ case Intrinsic::aarch64_sve_orr:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_orr_u);
+ case Intrinsic::aarch64_sve_sqsub:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqsub_u);
+ case Intrinsic::aarch64_sve_uqsub:
+ return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqsub_u);
----------------
kmclaughlin-arm wrote:
If the undef intrinsic names always end in "_u", could we do something like this here?
```suggestion
...
case Intrinsic::aarch64_sve_sqsub:
case Intrinsic::aarch64_sve_uqsub: {
Intrinsic::ID UndefID =
Intrinsic::lookupIntrinsicID(Intrinsic::getBaseName(IID).str() + ".u");
assert(UndefID != Intrinsic::not_intrinsic && "Expected Intrinsic ID");
return SVEIntrinsicInfo::defaultMergingOp(UndefID);
}
```
https://github.com/llvm/llvm-project/pull/126928
More information about the llvm-commits
mailing list