[llvm] [LLVM][InstCombine][AArch64] Refactor common SVE intrinsic combines. (PR #126928)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 04:52:05 PDT 2025
================
@@ -994,6 +994,509 @@ static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
return IC.replaceInstUsesWith(II, NPN);
}
+// A collection of properties common to SVE intrinsics that allow for combines
+// to be written without needing to know the specific intrinsic.
+struct SVEIntrinsicInfo {
+ //
+ // Helper routines for common intrinsic definitions.
+ //
+
+ // e.g. llvm.aarch64.sve.add pg, op1, op2
+ // with IID ==> llvm.aarch64.sve.add_u
+ static SVEIntrinsicInfo
+ defaultMergingOp(Intrinsic::ID IID = Intrinsic::not_intrinsic) {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(0)
+ .setOperandIdxInactiveLanesTakenFrom(1)
+ .setMatchingUndefIntrinsic(IID);
+ }
+
+ // e.g. llvm.aarch64.sve.neg inactive, pg, op
+ static SVEIntrinsicInfo defaultMergingUnaryOp() {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(1)
+ .setOperandIdxInactiveLanesTakenFrom(0)
+ .setOperandIdxWithNoActiveLanes(0);
+ }
+
+ // e.g. llvm.aarch64.sve.add_u pg, op1, op2
+ static SVEIntrinsicInfo defaultUndefOp() {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(0)
+ .setInactiveLanesAreNotDefined();
+ }
+
+ // e.g. llvm.aarch64.sve.prf pg, ptr (GPIndex = 0)
+ // llvm.aarch64.sve.st1 data, pg, ptr (GPIndex = 1)
+ static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex) {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(GPIndex)
+ .setInactiveLanesAreUnused();
+ }
+
+ // e.g. llvm.aarch64.sve.cmpeq pg, op1, op2
+ // llvm.aarch64.sve.ld1 pg, ptr
+ static SVEIntrinsicInfo defaultZeroingOp() {
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(0)
+ .setInactiveLanesAreUnused()
+ .setResultIsZeroInitialized();
+ }
+
+ // All properties relate to predication and thus having a general predicate
+ // is the minimum requirement to say there is intrinsic info to act on.
+ explicit operator bool() const { return hasGoverningPredicate(); }
+
+ //
+ // Properties relating to the governing predicate.
+ //
+
+ bool hasGoverningPredicate() const {
+ return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
+ }
+
+ unsigned getGoverningPredicateOperandIdx() const {
+ assert(hasGoverningPredicate() && "Propery not set!");
+ return GoverningPredicateIdx;
+ }
+
+ SVEIntrinsicInfo &setGoverningPredicateOperandIdx(unsigned Index) {
+ assert(!hasGoverningPredicate() && "Cannot set property twice!");
+ GoverningPredicateIdx = Index;
+ return *this;
+ }
+
+ //
+ // Properties relating to operations the intrinsic could be transformed into.
+ // NOTE: This does not mean such a transformation is always possible, but the
+ // knowledge makes it possible to reuse existing optimisations without needing
+ // to embed specific handling for each intrinsic. For example, instruction
+ // simplification can be used to optimise an intrinsic's active lanes.
+ //
+
+ bool hasMatchingUndefIntrinsic() const {
+ return UndefIntrinsic != Intrinsic::not_intrinsic;
+ }
+
+ Intrinsic::ID getMatchingUndefIntrinsic() const {
+ assert(hasMatchingUndefIntrinsic() && "Propery not set!");
+ return UndefIntrinsic;
+ }
+
+ SVEIntrinsicInfo &setMatchingUndefIntrinsic(Intrinsic::ID IID) {
+ assert(!hasMatchingUndefIntrinsic() && "Cannot set property twice!");
+ UndefIntrinsic = IID;
+ return *this;
+ }
+
+ //
+ // Properties relating to the result of inactive lanes.
+ //
+
+ bool inactiveLanesTakenFromOperand() const {
+ return ResultLanes == InactiveLanesTakenFromOperand;
+ }
+
+ unsigned getOperandIdxInactiveLanesTakenFrom() const {
+ assert(inactiveLanesTakenFromOperand() && "Propery not set!");
+ return OperandIdxForInactiveLanes;
+ }
+
+ SVEIntrinsicInfo &setOperandIdxInactiveLanesTakenFrom(unsigned Index) {
+ assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+ ResultLanes = InactiveLanesTakenFromOperand;
+ OperandIdxForInactiveLanes = Index;
+ return *this;
+ }
+
+ bool inactiveLanesAreNotDefined() const {
+ return ResultLanes == InactiveLanesAreNotDefined;
+ }
+
+ SVEIntrinsicInfo &setInactiveLanesAreNotDefined() {
+ assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+ ResultLanes = InactiveLanesAreNotDefined;
+ return *this;
+ }
+
+ bool inactiveLanesAreUnused() const {
+ return ResultLanes == InactiveLanesAreUnused;
+ }
+
+ SVEIntrinsicInfo &setInactiveLanesAreUnused() {
+ assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+ ResultLanes = InactiveLanesAreUnused;
+ return *this;
+ }
+
+ // NOTE: Whilst not limited to only inactive lanes, the common use case is:
+ // inactiveLanesAreZerod =
+ // resultIsZeroInitialized() && inactiveLanesAreUnused()
+ bool resultIsZeroInitialized() const { return ResultIsZeroInitialized; }
+
+ SVEIntrinsicInfo &setResultIsZeroInitialized() {
+ ResultIsZeroInitialized = true;
+ return *this;
+ }
+
+ //
+ // The first operand of unary merging operations is typically only used to
+ // set the result for inactive lanes. Knowing this allows us to deadcode the
+ // operand when we can prove there are no inactive lanes.
+ //
+
+ bool hasOperandWithNoActiveLanes() const {
+ return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
+ }
+
+ unsigned getOperandIdxWithNoActiveLanes() const {
+ assert(hasOperandWithNoActiveLanes() && "Propery not set!");
+ return OperandIdxWithNoActiveLanes;
+ }
+
+ SVEIntrinsicInfo &setOperandIdxWithNoActiveLanes(unsigned Index) {
+ assert(!hasOperandWithNoActiveLanes() && "Cannot set property twice!");
+ OperandIdxWithNoActiveLanes = Index;
+ return *this;
+ }
+
+private:
+ unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
+
+ Intrinsic::ID UndefIntrinsic = Intrinsic::not_intrinsic;
+
+ enum PredicationStyle {
+ Uninitialized,
+ InactiveLanesTakenFromOperand,
+ InactiveLanesAreNotDefined,
+ InactiveLanesAreUnused
+ } ResultLanes = Uninitialized;
+
+ bool ResultIsZeroInitialized = false;
+ unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
+ unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
+};
+
+static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
+ // Some SVE intrinsics do not use scalable vector types, but since they are
+ // not relevant from an SVEIntrinsicInfo perspective, they are also ignored.
+ if (!isa<ScalableVectorType>(II.getType()) &&
+ all_of(II.args(), [&](const Value *V) {
+ return !isa<ScalableVectorType>(V->getType());
+ }))
+ return SVEIntrinsicInfo();
+
+ Intrinsic::ID IID = II.getIntrinsicID();
+ switch (IID) {
+ default:
+ break;
+ case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
+ case Intrinsic::aarch64_sve_fcvt_f16f32:
+ case Intrinsic::aarch64_sve_fcvt_f16f64:
+ case Intrinsic::aarch64_sve_fcvt_f32f16:
+ case Intrinsic::aarch64_sve_fcvt_f32f64:
+ case Intrinsic::aarch64_sve_fcvt_f64f16:
+ case Intrinsic::aarch64_sve_fcvt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtlt_f32f16:
+ case Intrinsic::aarch64_sve_fcvtlt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtx_f32f64:
+ case Intrinsic::aarch64_sve_fcvtzs:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f32:
+ case Intrinsic::aarch64_sve_fcvtzu:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f32:
+ case Intrinsic::aarch64_sve_scvtf:
+ case Intrinsic::aarch64_sve_scvtf_f16i32:
+ case Intrinsic::aarch64_sve_scvtf_f16i64:
+ case Intrinsic::aarch64_sve_scvtf_f32i64:
+ case Intrinsic::aarch64_sve_scvtf_f64i32:
+ case Intrinsic::aarch64_sve_ucvtf:
+ case Intrinsic::aarch64_sve_ucvtf_f16i32:
+ case Intrinsic::aarch64_sve_ucvtf_f16i64:
+ case Intrinsic::aarch64_sve_ucvtf_f32i64:
+ case Intrinsic::aarch64_sve_ucvtf_f64i32:
+ return SVEIntrinsicInfo::defaultMergingUnaryOp();
+
+ case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
+ case Intrinsic::aarch64_sve_fcvtnt_f16f32:
+ case Intrinsic::aarch64_sve_fcvtnt_f32f64:
+ case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
+ return SVEIntrinsicInfo()
+ .setGoverningPredicateOperandIdx(1)
+ .setOperandIdxInactiveLanesTakenFrom(0);
----------------
paulwalker-arm wrote:
I wasn't sure there would be enough to warrant a dedicated name, but I'm happy to come up with something.
https://github.com/llvm/llvm-project/pull/126928
More information about the llvm-commits
mailing list