[llvm] [LLVM][InstCombine][AArch64] Refactor common SVE intrinsic combines. (PR #126928)

Kerry McLaughlin via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 20 04:12:38 PDT 2025


================
@@ -994,6 +994,509 @@ static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
   return IC.replaceInstUsesWith(II, NPN);
 }
 
+// A collection of properties common to SVE intrinsics that allow for combines
+// to be written without needing to know the specific intrinsic.
+struct SVEIntrinsicInfo {
+  //
+  // Helper routines for common intrinsic definitions.
+  //
+
+  // e.g. llvm.aarch64.sve.add pg, op1, op2
+  //        with IID ==> llvm.aarch64.sve.add_u
+  static SVEIntrinsicInfo
+  defaultMergingOp(Intrinsic::ID IID = Intrinsic::not_intrinsic) {
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(0)
+        .setOperandIdxInactiveLanesTakenFrom(1)
+        .setMatchingUndefIntrinsic(IID);
+  }
+
+  // e.g. llvm.aarch64.sve.neg inactive, pg, op
+  static SVEIntrinsicInfo defaultMergingUnaryOp() {
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(1)
+        .setOperandIdxInactiveLanesTakenFrom(0)
+        .setOperandIdxWithNoActiveLanes(0);
+  }
+
+  // e.g. llvm.aarch64.sve.add_u pg, op1, op2
+  static SVEIntrinsicInfo defaultUndefOp() {
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(0)
+        .setInactiveLanesAreNotDefined();
+  }
+
+  // e.g. llvm.aarch64.sve.prf pg, ptr        (GPIndex = 0)
+  //      llvm.aarch64.sve.st1 data, pg, ptr  (GPIndex = 1)
+  static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex) {
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(GPIndex)
+        .setInactiveLanesAreUnused();
+  }
+
+  // e.g. llvm.aarch64.sve.cmpeq pg, op1, op2
+  //      llvm.aarch64.sve.ld1 pg, ptr
+  static SVEIntrinsicInfo defaultZeroingOp() {
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(0)
+        .setInactiveLanesAreUnused()
+        .setResultIsZeroInitialized();
+  }
+
+  // All properties relate to predication and thus having a general predicate
+  // is the minimum requirement to say there is intrinsic info to act on.
+  explicit operator bool() const { return hasGoverningPredicate(); }
+
+  //
+  // Properties relating to the governing predicate.
+  //
+
+  bool hasGoverningPredicate() const {
+    return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
+  }
+
+  unsigned getGoverningPredicateOperandIdx() const {
+    assert(hasGoverningPredicate() && "Propery not set!");
+    return GoverningPredicateIdx;
+  }
+
+  SVEIntrinsicInfo &setGoverningPredicateOperandIdx(unsigned Index) {
+    assert(!hasGoverningPredicate() && "Cannot set property twice!");
+    GoverningPredicateIdx = Index;
+    return *this;
+  }
+
+  //
+  // Properties relating to operations the intrinsic could be transformed into.
+  // NOTE: This does not mean such a transformation is always possible, but the
+  // knowledge makes it possible to reuse existing optimisations without needing
+  // to embed specific handling for each intrinsic. For example, instruction
+  // simplification can be used to optimise an intrinsic's active lanes.
+  //
+
+  bool hasMatchingUndefIntrinsic() const {
+    return UndefIntrinsic != Intrinsic::not_intrinsic;
+  }
+
+  Intrinsic::ID getMatchingUndefIntrinsic() const {
+    assert(hasMatchingUndefIntrinsic() && "Propery not set!");
+    return UndefIntrinsic;
+  }
+
+  SVEIntrinsicInfo &setMatchingUndefIntrinsic(Intrinsic::ID IID) {
+    assert(!hasMatchingUndefIntrinsic() && "Cannot set property twice!");
+    UndefIntrinsic = IID;
+    return *this;
+  }
+
+  //
+  // Properties relating to the result of inactive lanes.
+  //
+
+  bool inactiveLanesTakenFromOperand() const {
+    return ResultLanes == InactiveLanesTakenFromOperand;
+  }
+
+  unsigned getOperandIdxInactiveLanesTakenFrom() const {
+    assert(inactiveLanesTakenFromOperand() && "Propery not set!");
+    return OperandIdxForInactiveLanes;
+  }
+
+  SVEIntrinsicInfo &setOperandIdxInactiveLanesTakenFrom(unsigned Index) {
+    assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+    ResultLanes = InactiveLanesTakenFromOperand;
+    OperandIdxForInactiveLanes = Index;
+    return *this;
+  }
+
+  bool inactiveLanesAreNotDefined() const {
+    return ResultLanes == InactiveLanesAreNotDefined;
+  }
+
+  SVEIntrinsicInfo &setInactiveLanesAreNotDefined() {
+    assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+    ResultLanes = InactiveLanesAreNotDefined;
+    return *this;
+  }
+
+  bool inactiveLanesAreUnused() const {
+    return ResultLanes == InactiveLanesAreUnused;
+  }
+
+  SVEIntrinsicInfo &setInactiveLanesAreUnused() {
+    assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+    ResultLanes = InactiveLanesAreUnused;
+    return *this;
+  }
+
+  // NOTE: Whilst not limited to only inactive lanes, the common use case is:
+  // inactiveLanesAreZerod =
+  //     resultIsZeroInitialized() && inactiveLanesAreUnused()
+  bool resultIsZeroInitialized() const { return ResultIsZeroInitialized; }
+
+  SVEIntrinsicInfo &setResultIsZeroInitialized() {
+    ResultIsZeroInitialized = true;
+    return *this;
+  }
+
+  //
+  // The first operand of unary merging operations is typically only used to
+  // set the result for inactive lanes. Knowing this allows us to deadcode the
+  // operand when we can prove there are no inactive lanes.
+  //
+
+  bool hasOperandWithNoActiveLanes() const {
+    return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
+  }
+
+  unsigned getOperandIdxWithNoActiveLanes() const {
+    assert(hasOperandWithNoActiveLanes() && "Propery not set!");
+    return OperandIdxWithNoActiveLanes;
+  }
+
+  SVEIntrinsicInfo &setOperandIdxWithNoActiveLanes(unsigned Index) {
+    assert(!hasOperandWithNoActiveLanes() && "Cannot set property twice!");
+    OperandIdxWithNoActiveLanes = Index;
+    return *this;
+  }
+
+private:
+  unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
+
+  Intrinsic::ID UndefIntrinsic = Intrinsic::not_intrinsic;
+
+  enum PredicationStyle {
+    Uninitialized,
+    InactiveLanesTakenFromOperand,
+    InactiveLanesAreNotDefined,
+    InactiveLanesAreUnused
+  } ResultLanes = Uninitialized;
+
+  bool ResultIsZeroInitialized = false;
+  unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
+  unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
+};
+
+static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
+  // Some SVE intrinsics do not use scalable vector types, but since they are
+  // not relevant from an SVEIntrinsicInfo perspective, they are also ignored.
+  if (!isa<ScalableVectorType>(II.getType()) &&
+      all_of(II.args(), [&](const Value *V) {
+        return !isa<ScalableVectorType>(V->getType());
+      }))
+    return SVEIntrinsicInfo();
+
+  Intrinsic::ID IID = II.getIntrinsicID();
+  switch (IID) {
+  default:
+    break;
+  case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
+  case Intrinsic::aarch64_sve_fcvt_f16f32:
+  case Intrinsic::aarch64_sve_fcvt_f16f64:
+  case Intrinsic::aarch64_sve_fcvt_f32f16:
+  case Intrinsic::aarch64_sve_fcvt_f32f64:
+  case Intrinsic::aarch64_sve_fcvt_f64f16:
+  case Intrinsic::aarch64_sve_fcvt_f64f32:
+  case Intrinsic::aarch64_sve_fcvtlt_f32f16:
+  case Intrinsic::aarch64_sve_fcvtlt_f64f32:
+  case Intrinsic::aarch64_sve_fcvtx_f32f64:
+  case Intrinsic::aarch64_sve_fcvtzs:
+  case Intrinsic::aarch64_sve_fcvtzs_i32f16:
+  case Intrinsic::aarch64_sve_fcvtzs_i32f64:
+  case Intrinsic::aarch64_sve_fcvtzs_i64f16:
+  case Intrinsic::aarch64_sve_fcvtzs_i64f32:
+  case Intrinsic::aarch64_sve_fcvtzu:
+  case Intrinsic::aarch64_sve_fcvtzu_i32f16:
+  case Intrinsic::aarch64_sve_fcvtzu_i32f64:
+  case Intrinsic::aarch64_sve_fcvtzu_i64f16:
+  case Intrinsic::aarch64_sve_fcvtzu_i64f32:
+  case Intrinsic::aarch64_sve_scvtf:
+  case Intrinsic::aarch64_sve_scvtf_f16i32:
+  case Intrinsic::aarch64_sve_scvtf_f16i64:
+  case Intrinsic::aarch64_sve_scvtf_f32i64:
+  case Intrinsic::aarch64_sve_scvtf_f64i32:
+  case Intrinsic::aarch64_sve_ucvtf:
+  case Intrinsic::aarch64_sve_ucvtf_f16i32:
+  case Intrinsic::aarch64_sve_ucvtf_f16i64:
+  case Intrinsic::aarch64_sve_ucvtf_f32i64:
+  case Intrinsic::aarch64_sve_ucvtf_f64i32:
+    return SVEIntrinsicInfo::defaultMergingUnaryOp();
+
+  case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
+  case Intrinsic::aarch64_sve_fcvtnt_f16f32:
+  case Intrinsic::aarch64_sve_fcvtnt_f32f64:
+  case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(1)
+        .setOperandIdxInactiveLanesTakenFrom(0);
+
+  case Intrinsic::aarch64_sve_fabd:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fabd_u);
+  case Intrinsic::aarch64_sve_fadd:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fadd_u);
+  case Intrinsic::aarch64_sve_fdiv:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fdiv_u);
+  case Intrinsic::aarch64_sve_fmax:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmax_u);
+  case Intrinsic::aarch64_sve_fmaxnm:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmaxnm_u);
+  case Intrinsic::aarch64_sve_fmin:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmin_u);
+  case Intrinsic::aarch64_sve_fminnm:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fminnm_u);
+  case Intrinsic::aarch64_sve_fmla:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmla_u);
+  case Intrinsic::aarch64_sve_fmls:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmls_u);
+  case Intrinsic::aarch64_sve_fmul:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmul_u);
+  case Intrinsic::aarch64_sve_fmulx:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmulx_u);
+  case Intrinsic::aarch64_sve_fnmla:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fnmla_u);
+  case Intrinsic::aarch64_sve_fnmls:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fnmls_u);
+  case Intrinsic::aarch64_sve_fsub:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fsub_u);
+  case Intrinsic::aarch64_sve_add:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_add_u);
+  case Intrinsic::aarch64_sve_mla:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mla_u);
+  case Intrinsic::aarch64_sve_mls:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mls_u);
+  case Intrinsic::aarch64_sve_mul:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mul_u);
+  case Intrinsic::aarch64_sve_sabd:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sabd_u);
+  case Intrinsic::aarch64_sve_smax:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smax_u);
+  case Intrinsic::aarch64_sve_smin:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smin_u);
+  case Intrinsic::aarch64_sve_smulh:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smulh_u);
+  case Intrinsic::aarch64_sve_sub:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sub_u);
+  case Intrinsic::aarch64_sve_uabd:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uabd_u);
+  case Intrinsic::aarch64_sve_umax:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umax_u);
+  case Intrinsic::aarch64_sve_umin:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umin_u);
+  case Intrinsic::aarch64_sve_umulh:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umulh_u);
+  case Intrinsic::aarch64_sve_asr:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_asr_u);
+  case Intrinsic::aarch64_sve_lsl:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsl_u);
+  case Intrinsic::aarch64_sve_lsr:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsr_u);
+  case Intrinsic::aarch64_sve_and:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_and_u);
+  case Intrinsic::aarch64_sve_bic:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_bic_u);
+  case Intrinsic::aarch64_sve_eor:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_eor_u);
+  case Intrinsic::aarch64_sve_orr:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_orr_u);
+  case Intrinsic::aarch64_sve_sqsub:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqsub_u);
+  case Intrinsic::aarch64_sve_uqsub:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqsub_u);
----------------
kmclaughlin-arm wrote:

If the undef intrinsic names always end in "_u", could we do something like this here?
```suggestion
  ...
  case Intrinsic::aarch64_sve_sqsub:
  case Intrinsic::aarch64_sve_uqsub: {
    Intrinsic::ID UndefID =
      Intrinsic::lookupIntrinsicID(Intrinsic::getBaseName(IID).str() + ".u");
    assert(UndefID != Intrinsic::not_intrinsic && "Expected Intrinsic ID");
    return SVEIntrinsicInfo::defaultMergingOp(UndefID);
  }
```

https://github.com/llvm/llvm-project/pull/126928


More information about the llvm-commits mailing list