[llvm] [LLVM][InstCombine][AArch64] Refactor common SVE intrinsic combines. (PR #126928)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 12 07:40:03 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Paul Walker (paulwalker-arm)

<details>
<summary>Changes</summary>

Introduce SVEIntrinsicInfo to store properties common across SVE intrinsics. This allows a seperation between intrinsic IDs and the transformations that can be applied to them, which reduces the layering problems we hit when adding new combines.

This PR is mostly refactoring to bring in the concept and port the most common combines (e.g. dead code when all false). This will be followed up with new combines where I plan to reuse much of the existing instruction simplifcation logic to significantly improve our ability to constant fold SVE intrinsics.

---

Patch is 42.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126928.diff


1 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+516-364) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index bd0d55f571234..dbc2f20b902ae 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -994,6 +994,513 @@ static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
   return IC.replaceInstUsesWith(II, NPN);
 }
 
+// A collection of properties common to SVE intrinsics that allow for combines
+// to be written without needing to know the specific intrinsic.
+struct SVEIntrinsicInfo {
+  //
+  // Helper routines for common intrinsic definitions.
+  //
+
+  // e.g. llvm.aarch64.sve.add pg, op1, op2
+  //        with IID ==> llvm.aarch64.sve.add_u
+  static SVEIntrinsicInfo
+  defaultMergingOp(Intrinsic::ID IID = Intrinsic::not_intrinsic) {
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(0)
+        .setOperandIdxInactiveLanesTakenFrom(1)
+        .setMatchingUndefIntrinsic(IID);
+  }
+
+  // e.g. llvm.aarch64.sve.neg inactive, pg, op
+  static SVEIntrinsicInfo defaultMergingUnaryOp() {
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(1)
+        .setOperandIdxInactiveLanesTakenFrom(0)
+        .setOperandIdxWithNoActiveLanes(0);
+  }
+
+  // e.g. llvm.aarch64.sve.add_u pg, op1, op2
+  static SVEIntrinsicInfo defaultUndefOp() {
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(0)
+        .setInactiveLanesAreNotDefined();
+  }
+
+  // e.g. llvm.aarch64.sve.prf pg, ptr        (GPIndex = 0)
+  //      llvm.aarch64.sve.st1 data, pg, ptr  (GPIndex = 1)
+  static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex) {
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(GPIndex)
+        .setInactiveLanesAreUnused();
+  }
+
+  // e.g. llvm.aarch64.sve.cmpeq pg, op1, op2
+  //      llvm.aarch64.sve.ld1 pg, ptr
+  static SVEIntrinsicInfo defaultZeroingOp() {
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(0)
+        .setInactiveLanesAreUnused()
+        .setResultIsZeroInitialized();
+  }
+
+  // All properties relate to predication and thus having a general predicate
+  // is the minimum requirement to say there is intrinsic info to act on.
+  explicit operator bool() const { return hasGoverningPredicate(); }
+
+  //
+  // Properties relating to the governing predicate.
+  //
+
+  bool hasGoverningPredicate() const {
+    return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
+  }
+
+  unsigned getGoverningPredicateOperandIdx() const {
+    assert(hasGoverningPredicate() && "Propery not set!");
+    return GoverningPredicateIdx;
+  }
+
+  SVEIntrinsicInfo &setGoverningPredicateOperandIdx(unsigned Index) {
+    assert(!hasGoverningPredicate() && "Cannot set property twice!");
+    GoverningPredicateIdx = Index;
+    return *this;
+  }
+
+  //
+  // Properties relating to operations the intrinsic could be transformed into.
+  // NOTE: This does not mean such a transformation is always possible, but the
+  // knowledge makes it possible to reuse existing optimisations without needing
+  // to embed specific handling for each intrinsic. For example, instruction
+  // simplification can be used to optimise an intrinsic's active lanes.
+  //
+
+  bool hasMatchingUndefIntrinsic() const {
+    return UndefIntrinsic != Intrinsic::not_intrinsic;
+  }
+
+  Intrinsic::ID getMatchingUndefIntrinsic() const {
+    assert(hasMatchingUndefIntrinsic() && "Propery not set!");
+    return UndefIntrinsic;
+  }
+
+  SVEIntrinsicInfo &setMatchingUndefIntrinsic(Intrinsic::ID IID) {
+    assert(!hasMatchingUndefIntrinsic() && "Cannot set property twice!");
+    UndefIntrinsic = IID;
+    return *this;
+  }
+
+  //
+  // Properties relating to the result of inactive lanes.
+  //
+
+  bool inactiveLanesTakenFromOperand() const {
+    return ResultLanes == InactiveLanesTakenFromOperand;
+  }
+
+  unsigned getOperandIdxInactiveLanesTakenFrom() const {
+    assert(inactiveLanesTakenFromOperand() && "Propery not set!");
+    return OperandIdxForInactiveLanes;
+  }
+
+  SVEIntrinsicInfo &setOperandIdxInactiveLanesTakenFrom(unsigned Index) {
+    assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+    ResultLanes = InactiveLanesTakenFromOperand;
+    OperandIdxForInactiveLanes = Index;
+    return *this;
+  }
+
+  bool inactiveLanesAreNotDefined() const {
+    return ResultLanes == InactiveLanesAreNotDefined;
+  }
+
+  SVEIntrinsicInfo &setInactiveLanesAreNotDefined() {
+    assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+    ResultLanes = InactiveLanesAreNotDefined;
+    return *this;
+  }
+
+  bool inactiveLanesAreUnused() const {
+    return ResultLanes == InactiveLanesAreUnused;
+  }
+
+  SVEIntrinsicInfo &setInactiveLanesAreUnused() {
+    assert(ResultLanes == Uninitialized && "Cannot set property twice!");
+    ResultLanes = InactiveLanesAreUnused;
+    return *this;
+  }
+
+  // NOTE: Whilst not limited to only inactive lanes, the common use case is:
+  // inactiveLanesAreZerod =
+  //     resultIsZeroInitialized() && inactiveLanesAreUnused()
+  bool resultIsZeroInitialized() const { return ResultIsZeroInitialized; }
+
+  SVEIntrinsicInfo &setResultIsZeroInitialized() {
+    ResultIsZeroInitialized = true;
+    return *this;
+  }
+
+  //
+  // The first operand of unary merging operations is typically only used to
+  // set the result for inactive lanes. Knowing this allows us to deadcode the
+  // operand when we can prove there are no inactive lanes.
+  //
+
+  bool hasOperandWithNoActiveLanes() const {
+    return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
+  }
+
+  unsigned getOperandIdxWithNoActiveLanes() const {
+    assert(hasOperandWithNoActiveLanes() && "Propery not set!");
+    return OperandIdxWithNoActiveLanes;
+  }
+
+  SVEIntrinsicInfo &setOperandIdxWithNoActiveLanes(unsigned Index) {
+    assert(!hasOperandWithNoActiveLanes() && "Cannot set property twice!");
+    OperandIdxWithNoActiveLanes = Index;
+    return *this;
+  }
+
+private:
+  unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
+
+  Intrinsic::ID UndefIntrinsic = Intrinsic::not_intrinsic;
+
+  enum PredicationStyle {
+    Uninitialized,
+    InactiveLanesTakenFromOperand,
+    InactiveLanesAreNotDefined,
+    InactiveLanesAreUnused
+  } ResultLanes = Uninitialized;
+
+  bool ResultIsZeroInitialized = false;
+  unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
+  unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
+};
+
+static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) {
+  // Some SVE intrinsics do not use scalable vector types, but since they are
+  // not relevant from an SVEIntrinsicInfo perspective, they are also ignored.
+  if (!isa<ScalableVectorType>(II.getType()) &&
+      all_of(II.args(), [&](const Value *V) {
+        return !isa<ScalableVectorType>(V->getType());
+      }))
+    return SVEIntrinsicInfo();
+
+  Intrinsic::ID IID = II.getIntrinsicID();
+  switch (IID) {
+  default:
+    break;
+  case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
+  case Intrinsic::aarch64_sve_fcvt_f16f32:
+  case Intrinsic::aarch64_sve_fcvt_f16f64:
+  case Intrinsic::aarch64_sve_fcvt_f32f16:
+  case Intrinsic::aarch64_sve_fcvt_f32f64:
+  case Intrinsic::aarch64_sve_fcvt_f64f16:
+  case Intrinsic::aarch64_sve_fcvt_f64f32:
+  case Intrinsic::aarch64_sve_fcvtlt_f32f16:
+  case Intrinsic::aarch64_sve_fcvtlt_f64f32:
+  case Intrinsic::aarch64_sve_fcvtx_f32f64:
+  case Intrinsic::aarch64_sve_fcvtzs:
+  case Intrinsic::aarch64_sve_fcvtzs_i32f16:
+  case Intrinsic::aarch64_sve_fcvtzs_i32f64:
+  case Intrinsic::aarch64_sve_fcvtzs_i64f16:
+  case Intrinsic::aarch64_sve_fcvtzs_i64f32:
+  case Intrinsic::aarch64_sve_fcvtzu:
+  case Intrinsic::aarch64_sve_fcvtzu_i32f16:
+  case Intrinsic::aarch64_sve_fcvtzu_i32f64:
+  case Intrinsic::aarch64_sve_fcvtzu_i64f16:
+  case Intrinsic::aarch64_sve_fcvtzu_i64f32:
+  case Intrinsic::aarch64_sve_scvtf:
+  case Intrinsic::aarch64_sve_scvtf_f16i32:
+  case Intrinsic::aarch64_sve_scvtf_f16i64:
+  case Intrinsic::aarch64_sve_scvtf_f32i64:
+  case Intrinsic::aarch64_sve_scvtf_f64i32:
+  case Intrinsic::aarch64_sve_ucvtf:
+  case Intrinsic::aarch64_sve_ucvtf_f16i32:
+  case Intrinsic::aarch64_sve_ucvtf_f16i64:
+  case Intrinsic::aarch64_sve_ucvtf_f32i64:
+  case Intrinsic::aarch64_sve_ucvtf_f64i32:
+    return SVEIntrinsicInfo::defaultMergingUnaryOp();
+
+  case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
+  case Intrinsic::aarch64_sve_fcvtnt_f16f32:
+  case Intrinsic::aarch64_sve_fcvtnt_f32f64:
+  case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
+    return SVEIntrinsicInfo()
+        .setGoverningPredicateOperandIdx(1)
+        .setOperandIdxInactiveLanesTakenFrom(0);
+
+  case Intrinsic::aarch64_sve_fabd:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fabd_u);
+  case Intrinsic::aarch64_sve_fadd:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fadd_u);
+  case Intrinsic::aarch64_sve_fdiv:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fdiv_u);
+  case Intrinsic::aarch64_sve_fmax:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmax_u);
+  case Intrinsic::aarch64_sve_fmaxnm:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmaxnm_u);
+  case Intrinsic::aarch64_sve_fmin:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmin_u);
+  case Intrinsic::aarch64_sve_fminnm:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fminnm_u);
+  case Intrinsic::aarch64_sve_fmla:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmla_u);
+  case Intrinsic::aarch64_sve_fmls:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmls_u);
+  case Intrinsic::aarch64_sve_fmul:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmul_u);
+  case Intrinsic::aarch64_sve_fmulx:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fmulx_u);
+  case Intrinsic::aarch64_sve_fnmla:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fnmla_u);
+  case Intrinsic::aarch64_sve_fnmls:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fnmls_u);
+  case Intrinsic::aarch64_sve_fsub:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_fsub_u);
+  case Intrinsic::aarch64_sve_add:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_add_u);
+  case Intrinsic::aarch64_sve_mla:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mla_u);
+  case Intrinsic::aarch64_sve_mls:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mls_u);
+  case Intrinsic::aarch64_sve_mul:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_mul_u);
+  case Intrinsic::aarch64_sve_sabd:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sabd_u);
+  case Intrinsic::aarch64_sve_smax:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smax_u);
+  case Intrinsic::aarch64_sve_smin:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smin_u);
+  case Intrinsic::aarch64_sve_smulh:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_smulh_u);
+  case Intrinsic::aarch64_sve_sub:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sub_u);
+  case Intrinsic::aarch64_sve_uabd:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uabd_u);
+  case Intrinsic::aarch64_sve_umax:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umax_u);
+  case Intrinsic::aarch64_sve_umin:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umin_u);
+  case Intrinsic::aarch64_sve_umulh:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_umulh_u);
+  case Intrinsic::aarch64_sve_asr:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_asr_u);
+  case Intrinsic::aarch64_sve_lsl:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsl_u);
+  case Intrinsic::aarch64_sve_lsr:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_lsr_u);
+  case Intrinsic::aarch64_sve_and:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_and_u);
+  case Intrinsic::aarch64_sve_bic:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_bic_u);
+  case Intrinsic::aarch64_sve_eor:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_eor_u);
+  case Intrinsic::aarch64_sve_orr:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_orr_u);
+  case Intrinsic::aarch64_sve_sqsub:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqsub_u);
+  case Intrinsic::aarch64_sve_uqsub:
+    return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqsub_u);
+
+  case Intrinsic::aarch64_sve_addqv:
+  case Intrinsic::aarch64_sve_and_z:
+  case Intrinsic::aarch64_sve_bic_z:
+  case Intrinsic::aarch64_sve_brka_z:
+  case Intrinsic::aarch64_sve_brkb_z:
+  case Intrinsic::aarch64_sve_brkn_z:
+  case Intrinsic::aarch64_sve_brkpa_z:
+  case Intrinsic::aarch64_sve_brkpb_z:
+  case Intrinsic::aarch64_sve_cntp:
+  case Intrinsic::aarch64_sve_compact:
+  case Intrinsic::aarch64_sve_eor_z:
+  case Intrinsic::aarch64_sve_eorv:
+  case Intrinsic::aarch64_sve_eorqv:
+  case Intrinsic::aarch64_sve_nand_z:
+  case Intrinsic::aarch64_sve_nor_z:
+  case Intrinsic::aarch64_sve_orn_z:
+  case Intrinsic::aarch64_sve_orr_z:
+  case Intrinsic::aarch64_sve_orv:
+  case Intrinsic::aarch64_sve_orqv:
+  case Intrinsic::aarch64_sve_pnext:
+  case Intrinsic::aarch64_sve_rdffr_z:
+  case Intrinsic::aarch64_sve_saddv:
+  case Intrinsic::aarch64_sve_uaddv:
+  case Intrinsic::aarch64_sve_umaxv:
+  case Intrinsic::aarch64_sve_umaxqv:
+  case Intrinsic::aarch64_sve_cmpeq:
+  case Intrinsic::aarch64_sve_cmpeq_wide:
+  case Intrinsic::aarch64_sve_cmpge:
+  case Intrinsic::aarch64_sve_cmpge_wide:
+  case Intrinsic::aarch64_sve_cmpgt:
+  case Intrinsic::aarch64_sve_cmpgt_wide:
+  case Intrinsic::aarch64_sve_cmphi:
+  case Intrinsic::aarch64_sve_cmphi_wide:
+  case Intrinsic::aarch64_sve_cmphs:
+  case Intrinsic::aarch64_sve_cmphs_wide:
+  case Intrinsic::aarch64_sve_cmple_wide:
+  case Intrinsic::aarch64_sve_cmplo_wide:
+  case Intrinsic::aarch64_sve_cmpls_wide:
+  case Intrinsic::aarch64_sve_cmplt_wide:
+  case Intrinsic::aarch64_sve_cmpne:
+  case Intrinsic::aarch64_sve_cmpne_wide:
+  case Intrinsic::aarch64_sve_facge:
+  case Intrinsic::aarch64_sve_facgt:
+  case Intrinsic::aarch64_sve_fcmpeq:
+  case Intrinsic::aarch64_sve_fcmpge:
+  case Intrinsic::aarch64_sve_fcmpgt:
+  case Intrinsic::aarch64_sve_fcmpne:
+  case Intrinsic::aarch64_sve_fcmpuo:
+  case Intrinsic::aarch64_sve_ld1:
+  case Intrinsic::aarch64_sve_ld1_gather:
+  case Intrinsic::aarch64_sve_ld1_gather_index:
+  case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_ld1_gather_sxtw:
+  case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_ld1_gather_uxtw:
+  case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
+  case Intrinsic::aarch64_sve_ld1q_gather_index:
+  case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
+  case Intrinsic::aarch64_sve_ld1ro:
+  case Intrinsic::aarch64_sve_ld1rq:
+  case Intrinsic::aarch64_sve_ld1udq:
+  case Intrinsic::aarch64_sve_ld1uwq:
+  case Intrinsic::aarch64_sve_ld2_sret:
+  case Intrinsic::aarch64_sve_ld2q_sret:
+  case Intrinsic::aarch64_sve_ld3_sret:
+  case Intrinsic::aarch64_sve_ld3q_sret:
+  case Intrinsic::aarch64_sve_ld4_sret:
+  case Intrinsic::aarch64_sve_ld4q_sret:
+  case Intrinsic::aarch64_sve_ldff1:
+  case Intrinsic::aarch64_sve_ldff1_gather:
+  case Intrinsic::aarch64_sve_ldff1_gather_index:
+  case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
+  case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
+  case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
+  case Intrinsic::aarch64_sve_ldnf1:
+  case Intrinsic::aarch64_sve_ldnt1:
+  case Intrinsic::aarch64_sve_ldnt1_gather:
+  case Intrinsic::aarch64_sve_ldnt1_gather_index:
+  case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
+    return SVEIntrinsicInfo::defaultZeroingOp();
+
+  case Intrinsic::aarch64_sve_prf:
+  case Intrinsic::aarch64_sve_prfb_gather_index:
+  case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
+  case Intrinsic::aarch64_sve_prfd_gather_index:
+  case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
+  case Intrinsic::aarch64_sve_prfh_gather_index:
+  case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
+  case Intrinsic::aarch64_sve_prfw_gather_index:
+  case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
+  case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
+  case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
+    return SVEIntrinsicInfo::defaultVoidOp(0);
+
+  case Intrinsic::aarch64_sve_st1_scatter:
+  case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
+  case Intrinsic::aarch64_sve_st1_scatter_sxtw:
+  case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
+  case Intrinsic::aarch64_sve_st1_scatter_uxtw:
+  case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
+  case Intrinsic::aarch64_sve_st1dq:
+  case Intrinsic::aarch64_sve_st1q_scatter_index:
+  case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
+  case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
+  case Intrinsic::aarch64_sve_st1wq:
+  case Intrinsic::aarch64_sve_stnt1:
+  case Intrinsic::aarch64_sve_stnt1_scatter:
+  case Intrinsic::aarch64_sve_stnt1_scatter_index:
+  case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
+  case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
+    return SVEIntrinsicInfo::defaultVoidOp(1);
+  case Intrinsic::aarch64_sve_st2:
+  case Intrinsic::aarch64_sve_st2q:
+    return SVEIntrinsicInfo::defaultVoidOp(2);
+  case Intrinsic::aarch64_sve_st3:
+  case Intrinsic::aarch64_sve_st3q:
+    return SVEIntrinsicInfo::defaultVoidOp(3);
+  case Intrinsic::aarch64_sve_st4:
+  case Intrinsic::aarch64_sve_st4q:
+    return SVEIntrinsicInfo::defaultVoidOp(4);
+  }
+
+  return SVEIntrinsicInfo();
+}
+
+static bool isAllActivePredicate(Value *Pred) {
+  // Look through convert.from.svbool(convert.to.svbool(...) chain.
+  Value *UncastedPred;
+  if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_convert_from_svbool>(
+                      m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>(
+                          m_Value(UncastedPred)))))
+    // If the predicate has the same or less lanes than the uncasted
+    // predicate then we know the casting has no effect.
+    if (cast<ScalableVectorType>(Pred->getType())->getMinNumElements() <=
+        cast<ScalableVectorType>(UncastedPred->getType())->getMinNumElements())
+      Pred = UncastedPred;
+
+  return match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+                         m_ConstantInt<AArch64SVEPredPattern::all>()));
+}
+
+// Use SVE intrinsic info to eliminate redundant operands and/or canonicalise
+// to operations with less strict inactive lane requirements.
+static std::optional<Instruction *>
+simplifySVEIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+                     const SVEIntrinsicInfo &IInfo) {
+  if (!IInfo.hasGoverningPredicate())
+    return std::nullopt;
+
+  auto *OpPredicate = II.getOperand(IInfo.getGoverningPredicateOperandIdx());
+
+  // If there are no active lanes.
+  if (match(OpPredicate, m_ZeroInt())) {
+    if (IInfo.inactiveLanesTakenFromOperand())
+      return IC.replaceInstUsesWith(
+          II, II.getOperand(IInfo.getOperandIdxInactiveLanesTakenFrom()));
+
+    if (IInfo.inactiveLanesAreUnused()) {
+      if (IInfo.resultIsZeroInitialized()) {
+        IC.replaceInstUsesWith(II, Constant::getNul...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/126928


More information about the llvm-commits mailing list