[llvm] [AArch64][SVE] optimisation for unary SVE intrinsics with no active lanes (PR #86651)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 26 04:12:09 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-aarch64
Author: None (Lukacma)
<details>
<summary>Changes</summary>
This patch extends #<!-- -->73964 and adds optimisation of unary intrinsics.
---
Patch is 169.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86651.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+369)
- (modified) llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll (+3291-1)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index af0b6873d170dd..40bd17053d34cd 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -923,6 +923,80 @@ static bool isAllActivePredicate(Value *Pred) {
m_ConstantInt<AArch64SVEPredPattern::all>()));
}
+// Simplify unary operation where predicate has all inactive lanes by replacing
+// instruction with its operand
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryReplace(InstCombiner &IC, IntrinsicInst &II,
+ bool hasInactiveVector) {
+ int PredOperand = hasInactiveVector ? 1 : 0;
+ int ReplaceOperand = hasInactiveVector ? 0 : 1;
+ if (match(II.getOperand(PredOperand), m_ZeroInt())) {
+ IC.replaceInstUsesWith(II, II.getOperand(ReplaceOperand));
+ return IC.eraseInstFromFunction(II);
+ }
+ return std::nullopt;
+}
+
+// Simplify unary operation where predicate has all inactive lanes by replacing
+// instruction with zeroed object
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryZero(InstCombiner &IC, IntrinsicInst &II) {
+ if (match(II.getOperand(0), m_ZeroInt())) {
+ Constant *Node;
+ Type *RetTy = II.getType();
+ if (RetTy->isStructTy()){
+ auto StructT = cast<StructType>(RetTy);
+ auto VecT = StructT->getElementType(0);
+ SmallVector<llvm::Constant*, 4> ZerVec;
+ for (unsigned i = 0; i < StructT->getNumElements(); i++){
+ ZerVec.push_back(VecT->isFPOrFPVectorTy() ? ConstantFP::get(VecT, 0.0):
+ ConstantInt::get(VecT, 0));
+ }
+ Node = ConstantStruct::get(StructT, ZerVec);
+ }
+ else if (RetTy->isFPOrFPVectorTy())
+ Node = ConstantFP::get(RetTy, 0.0);
+ else
+ Node = ConstantInt::get(II.getType(), 0);
+
+ IC.replaceInstUsesWith(II, Node);
+ return IC.eraseInstFromFunction(II);
+ }
+ return std::nullopt;
+}
+
+//Erase unary operation where predicate has all inactive lanes
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, int PredPos) {
+ if (match(II.getOperand(PredPos), m_ZeroInt())) {
+ return IC.eraseInstFromFunction(II);
+ }
+ return std::nullopt;
+}
+
+// Simplify unary operation where predicate has all inactive lanes by replacing
+// instruction with given constant
+static std::optional<Instruction *>
+instCombineSVENoActiveUnaryConstant(InstCombiner &IC, IntrinsicInst &II, Constant *NewVal) {
+ if (match(II.getOperand(0), m_ZeroInt())) {
+ IC.replaceInstUsesWith(II, NewVal);
+ return IC.eraseInstFromFunction(II);
+ }
+ return std::nullopt;
+}
+
+// Simplify unary operation where predicate has all inactive lanes or try to replace
+// with _x form when all lanes are active
+static std::optional<Instruction *>
+instCombineSVEAllOrNoActiveUnary(InstCombiner &IC, IntrinsicInst &II) {
+ if (isAllActivePredicate(II.getOperand(1))
+ && !isa<llvm::UndefValue>(II.getOperand(0))){
+ Value *Undef = llvm::UndefValue::get(II.getType());
+ return IC.replaceOperand(II, 0, Undef);
+ }
+ return instCombineSVENoActiveUnaryReplace(IC, II, true);
+}
+
static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
IntrinsicInst &II) {
// svsel(ptrue, x, y) => x
@@ -937,6 +1011,12 @@ static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
static std::optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
IntrinsicInst &II) {
+
+ // Optimize when predicate is known all active or all inactive
+ if (auto II_NA =
+ instCombineSVEAllOrNoActiveUnary(IC, II))
+ return II_NA;
+
IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
if (!Pg)
return std::nullopt;
@@ -971,6 +1051,12 @@ static std::optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
IntrinsicInst &II) {
+
+ //Replace by zero constant when all lanes are inactive
+ if (auto II_NA =
+ instCombineSVENoActiveUnaryZero(IC, II))
+ return II_NA;
+
LLVMContext &Ctx = II.getContext();
// Check that the predicate is all active
@@ -1156,6 +1242,11 @@ static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
Value *Vec = II.getArgOperand(2);
Type *Ty = II.getType();
+ //If all lanes are inactive replace with operand
+ if (auto II_NA =
+ instCombineSVENoActiveUnaryReplace(IC, II, false))
+ return II_NA;
+
if (!Ty->isIntegerTy())
return std::nullopt;
@@ -1336,6 +1427,11 @@ instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
Value *PtrOp = II.getOperand(1);
Type *VecTy = II.getType();
+ //Replace by zero constant when all lanes are inactive
+ if (auto II_NA =
+ instCombineSVENoActiveUnaryZero(IC, II))
+ return II_NA;
+
if (isAllActivePredicate(Pred)) {
LoadInst *Load = IC.Builder.CreateLoad(VecTy, PtrOp);
Load->copyMetadata(II);
@@ -1355,6 +1451,11 @@ instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
Value *Pred = II.getOperand(1);
Value *PtrOp = II.getOperand(2);
+ //Remove when all lanes are inactive
+ if (auto II_NA =
+ instCombineSVENoActiveUnaryErase(IC, II, 0))
+ return II_NA;
+
if (isAllActivePredicate(Pred)) {
StoreInst *Store = IC.Builder.CreateStore(VecOp, PtrOp);
Store->copyMetadata(II);
@@ -1653,6 +1754,11 @@ instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) {
Type *Ty = II.getType();
Value *PassThru = ConstantAggregateZero::get(Ty);
+ //Replace by zero constant when all lanes are inactive
+ if (auto II_NA =
+ instCombineSVENoActiveUnaryZero(IC, II))
+ return II_NA;
+
// Contiguous gather => masked load.
// (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1))
// => (masked.load (gep BasePtr IndexBase) Align Mask zeroinitializer)
@@ -1683,6 +1789,11 @@ instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) {
Value *Index = II.getOperand(3);
Type *Ty = Val->getType();
+ //Remove when all lanes are inactive
+ if (auto II_NA =
+ instCombineSVENoActiveUnaryErase(IC, II, 0))
+ return II_NA;
+
// Contiguous scatter => masked store.
// (sve.st1.scatter.index Value Mask BasePtr (sve.index IndexBase 1))
// => (masked.store Value (gep BasePtr IndexBase) Align Mask)
@@ -1879,6 +1990,264 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
switch (IID) {
default:
break;
+
+ case Intrinsic::aarch64_sve_abs:
+ case Intrinsic::aarch64_sve_bfcvt_x2:
+ case Intrinsic::aarch64_sve_cls:
+ case Intrinsic::aarch64_sve_clz:
+ case Intrinsic::aarch64_sve_cnot:
+ case Intrinsic::aarch64_sve_cnt:
+ case Intrinsic::aarch64_sve_fabs:
+ case Intrinsic::aarch64_sve_fcvt:
+ case Intrinsic::aarch64_sve_fcvt_x2:
+ case Intrinsic::aarch64_sve_fcvtn_x2:
+ case Intrinsic::aarch64_sve_fcvtzs_x2:
+ case Intrinsic::aarch64_sve_fcvtzs_x4:
+ case Intrinsic::aarch64_sve_fcvtzu_x2:
+ case Intrinsic::aarch64_sve_fcvtzu_x4:
+ case Intrinsic::aarch64_sve_fcvtzs:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzs_i64f32:
+ case Intrinsic::aarch64_sve_fcvt_bf16f32:
+ case Intrinsic::aarch64_sve_fcvtnt_bf16f32:
+ case Intrinsic::aarch64_sve_fcvtzs_i32f64:
+ case Intrinsic::aarch64_sve_fcvtzu:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f16:
+ case Intrinsic::aarch64_sve_fcvtzu_i64f32:
+ case Intrinsic::aarch64_sve_fcvtzu_i32f64:
+ case Intrinsic::aarch64_sve_fcvt_f16f32:
+ case Intrinsic::aarch64_sve_fcvt_f16f64:
+ case Intrinsic::aarch64_sve_fcvt_f32f16:
+ case Intrinsic::aarch64_sve_fcvt_f32f64:
+ case Intrinsic::aarch64_sve_fcvt_f64f16:
+ case Intrinsic::aarch64_sve_fcvt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtlt_f32f16:
+ case Intrinsic::aarch64_sve_fcvtlt_f64f32:
+ case Intrinsic::aarch64_sve_fcvtx_f32f64:
+ case Intrinsic::aarch64_sve_fcvtnt_f16f32:
+ case Intrinsic::aarch64_sve_fcvtnt_f32f64:
+ case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
+ case Intrinsic::aarch64_sve_flogb:
+ case Intrinsic::aarch64_sve_fmaxp:
+ case Intrinsic::aarch64_sve_fminp:
+ case Intrinsic::aarch64_sve_fneg:
+ case Intrinsic::aarch64_sve_frecpx:
+ case Intrinsic::aarch64_sve_frinta:
+ case Intrinsic::aarch64_sve_frinti:
+ case Intrinsic::aarch64_sve_frintm:
+ case Intrinsic::aarch64_sve_frintn:
+ case Intrinsic::aarch64_sve_frintp:
+ case Intrinsic::aarch64_sve_frintx:
+ case Intrinsic::aarch64_sve_frintz:
+ case Intrinsic::aarch64_sve_fscale:
+ case Intrinsic::aarch64_sve_fsqrt:
+ case Intrinsic::aarch64_sve_neg:
+ case Intrinsic::aarch64_sve_not:
+ case Intrinsic::aarch64_sve_rbit:
+ case Intrinsic::aarch64_sve_revb:
+ case Intrinsic::aarch64_sve_revh:
+ case Intrinsic::aarch64_sve_revw:
+ case Intrinsic::aarch64_sve_revd:
+ case Intrinsic::aarch64_sve_scvtf:
+ case Intrinsic::aarch64_sve_scvtf_f16i32:
+ case Intrinsic::aarch64_sve_scvtf_f16i64:
+ case Intrinsic::aarch64_sve_scvtf_f32i64:
+ case Intrinsic::aarch64_sve_scvtf_f64i32:
+ case Intrinsic::aarch64_sve_scvtf_x2:
+ case Intrinsic::aarch64_sve_scvtf_x4:
+ case Intrinsic::aarch64_sve_ucvtf:
+ case Intrinsic::aarch64_sve_ucvtf_f16i32:
+ case Intrinsic::aarch64_sve_ucvtf_f16i64:
+ case Intrinsic::aarch64_sve_ucvtf_f32i64:
+ case Intrinsic::aarch64_sve_ucvtf_f64i32:
+ case Intrinsic::aarch64_sve_ucvtf_x2:
+ case Intrinsic::aarch64_sve_ucvtf_x4:
+ case Intrinsic::aarch64_sve_sqabs:
+ case Intrinsic::aarch64_sve_sqneg:
+ case Intrinsic::aarch64_sve_sqrshl:
+ case Intrinsic::aarch64_sve_sqshl:
+ case Intrinsic::aarch64_sve_sqshlu:
+ case Intrinsic::aarch64_sve_sxtb:
+ case Intrinsic::aarch64_sve_sxth:
+ case Intrinsic::aarch64_sve_sxtw:
+ case Intrinsic::aarch64_sve_urecpe:
+ case Intrinsic::aarch64_sve_ursqrte:
+ case Intrinsic::aarch64_sve_uxtb:
+ case Intrinsic::aarch64_sve_uxth:
+ case Intrinsic::aarch64_sve_uxtw:
+ return instCombineSVEAllOrNoActiveUnary(IC, II);
+ case Intrinsic::aarch64_sve_brka:
+ case Intrinsic::aarch64_sve_brkb:
+ case Intrinsic::aarch64_sve_sqdecp:
+ case Intrinsic::aarch64_sve_uqdecp:
+ case Intrinsic::aarch64_sve_sqdecp_n32:
+ case Intrinsic::aarch64_sve_sqdecp_n64:
+ return instCombineSVENoActiveUnaryReplace(IC, II, true);
+ case Intrinsic::aarch64_sve_asrd:
+ case Intrinsic::aarch64_sve_clasta:
+ case Intrinsic::aarch64_sve_clastb:
+ case Intrinsic::aarch64_sve_pfirst:
+ return instCombineSVENoActiveUnaryReplace(IC, II, false);
+ case Intrinsic::aarch64_sve_addqv:
+ case Intrinsic::aarch64_sve_brka_z:
+ case Intrinsic::aarch64_sve_brkb_z:
+ case Intrinsic::aarch64_sve_brkn_z:
+ case Intrinsic::aarch64_sve_brkpa_z:
+ case Intrinsic::aarch64_sve_brkpb_z:
+ case Intrinsic::aarch64_sve_cmpeq:
+ case Intrinsic::aarch64_sve_cmpge:
+ case Intrinsic::aarch64_sve_cmpgt:
+ case Intrinsic::aarch64_sve_cmphi:
+ case Intrinsic::aarch64_sve_cmphs:
+ case Intrinsic::aarch64_sve_cmpeq_wide:
+ case Intrinsic::aarch64_sve_cmpge_wide:
+ case Intrinsic::aarch64_sve_cmpgt_wide:
+ case Intrinsic::aarch64_sve_cmphi_wide:
+ case Intrinsic::aarch64_sve_cmphs_wide:
+ case Intrinsic::aarch64_sve_cmple_wide:
+ case Intrinsic::aarch64_sve_cmplt_wide:
+ case Intrinsic::aarch64_sve_cmplo_wide:
+ case Intrinsic::aarch64_sve_cmpls_wide:
+ case Intrinsic::aarch64_sve_cntp:
+ case Intrinsic::aarch64_sve_compact:
+ case Intrinsic::aarch64_sve_eorv:
+ case Intrinsic::aarch64_sve_eorqv:
+ case Intrinsic::aarch64_sve_facge:
+ case Intrinsic::aarch64_sve_facgt:
+ case Intrinsic::aarch64_sve_faddv:
+ case Intrinsic::aarch64_sve_fcmpeq:
+ case Intrinsic::aarch64_sve_fcmpne:
+ case Intrinsic::aarch64_sve_fcmpge:
+ case Intrinsic::aarch64_sve_fcmpgt:
+ case Intrinsic::aarch64_sve_fcmpuo:
+ case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_ld1_gather:
+ case Intrinsic::aarch64_sve_ld1_gather_sxtw:
+ case Intrinsic::aarch64_sve_ld1_gather_uxtw:
+ case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_ld1_pn_x2:
+ case Intrinsic::aarch64_sve_ld1_pn_x4:
+ case Intrinsic::aarch64_sve_ld1rq:
+ case Intrinsic::aarch64_sve_ld1ro:
+ case Intrinsic::aarch64_sve_ld1uwq:
+ case Intrinsic::aarch64_sve_ld1udq:
+ case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_ld1q_gather_index:
+ case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
+ case Intrinsic::aarch64_sve_ld2_sret:
+ case Intrinsic::aarch64_sve_ld2q_sret:
+ case Intrinsic::aarch64_sve_ld3_sret:
+ case Intrinsic::aarch64_sve_ld3q_sret:
+ case Intrinsic::aarch64_sve_ld4_sret:
+ case Intrinsic::aarch64_sve_ld4q_sret:
+ case Intrinsic::aarch64_sve_ldff1:
+ case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_ldff1_gather:
+ case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
+ case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
+ case Intrinsic::aarch64_sve_ldff1_gather_index:
+ case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_ldnf1:
+ case Intrinsic::aarch64_sve_ldnt1:
+ case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_ldnt1_gather:
+ case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
+ case Intrinsic::aarch64_sve_ldnt1_gather_index:
+ case Intrinsic::aarch64_sve_orv:
+ case Intrinsic::aarch64_sve_orqv:
+ case Intrinsic::aarch64_sve_rdffr_z:
+ case Intrinsic::aarch64_sve_saddv:
+ case Intrinsic::aarch64_sve_uaddv:
+ case Intrinsic::aarch64_sve_umaxv:
+ case Intrinsic::aarch64_sve_umaxqv:
+ return instCombineSVENoActiveUnaryZero(IC, II);
+ case Intrinsic::aarch64_sve_andqv:
+ case Intrinsic::aarch64_sve_andv:
+ return instCombineSVENoActiveUnaryConstant(IC, II,
+ ConstantInt::get(II.getType(), 1));
+ case Intrinsic::aarch64_sve_fmaxnmqv:
+ case Intrinsic::aarch64_sve_fmaxnmv:
+ case Intrinsic::aarch64_sve_fminnmqv:
+ case Intrinsic::aarch64_sve_fminnmv:
+ return instCombineSVENoActiveUnaryConstant(IC, II,
+ ConstantFP::getQNaN(II.getType()));
+ case Intrinsic::aarch64_sve_fmaxqv:
+ case Intrinsic::aarch64_sve_fmaxv:
+ return instCombineSVENoActiveUnaryConstant(IC, II,
+ ConstantFP::getInfinity(II.getType(), true));
+ case Intrinsic::aarch64_sve_fminqv:
+ case Intrinsic::aarch64_sve_fminv:
+ return instCombineSVENoActiveUnaryConstant(IC, II,
+ ConstantFP::getInfinity(II.getType()));
+ case Intrinsic::aarch64_sve_prf:
+ case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfb_gather_index:
+ case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfh_gather_index:
+ case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfw_gather_index:
+ case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
+ case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
+ case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
+ case Intrinsic::aarch64_sve_prfd_gather_index:
+ case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
+ return instCombineSVENoActiveUnaryErase(IC, II, 0);
+ case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
+ case Intrinsic::aarch64_sve_st1_scatter:
+ case Intrinsic::aarch64_sve_st1_scatter_sxtw:
+ case Intrinsic::aarch64_sve_st1_scatter_uxtw:
+ case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
+ case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
+ case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
+ case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
+ case Intrinsic::aarch64_sve_st1q_scatter_index:
+ case Intrinsic::aarch64_sve_st1dq:
+ case Intrinsic::aarch64_sve_st1wq:
+ case Intrinsic::aarch64_sve_stnt1:
+ case Intrinsic::aarch64_sve_stnt1_scatter:
+ case Intrinsic::aarch64_sve_stnt1_scatter_index:
+ case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
+ case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
+ return instCombineSVENoActiveUnaryErase(IC, II, 1);
+ case Intrinsic::aarch64_sve_st2:
+ case Intrinsic::aarch64_sve_st2q:
+ return instCombineSVENoActiveUnaryErase(IC, II, 2);
+ case Intrinsic::aarch64_sve_st3:
+ case Intrinsic::aarch64_sve_st3q:
+ return instCombineSVENoActiveUnaryErase(IC, II, 3);
+ case Intrinsic::aarch64_sve_st4:
+ case Intrinsic::aarch64_sve_st4q:
+ return instCombineSVENoActiveUnaryErase(IC, II, 4);
+ case Intrinsic::aarch64_sve_smaxv:
+ case Intrinsic::aarch64_sve_smaxqv:
+ {
+ auto *MinSInt = ConstantInt::get(II.getType(), APInt::getSignedMinValue(
+ II.getType()->getScalarSizeInBits()));
+ return instCombineSVENoActiveUnaryConstant(IC, II, MinSInt);
+ }
+ case Intrinsic::aarch64_sve_sminv:
+ case Intrinsic::aarch64_sve_sminqv:
+ {
+ auto *MaxSInt = ConstantInt::get(II.getType(), APInt::getSignedMaxValue(
+ II.getType()->getScalarSizeInBits()));
+ return instCombineSVENoActiveUnaryConstant(IC, II, MaxSInt);
+ }
+ case Intrinsic::aarch64_sve_uminv:
+ case Intrinsic::aarch64_sve_uminqv:
+ {
+ auto *MaxUInt = ConstantInt::get(II.getType(), APInt::getMaxValue(
+ II.getType()->getScalarSizeInBits()));
+ return instCombineSVENoActiveUnaryConstant(IC, II, MaxUInt);
+ }
case Intrinsic::aarch64_neon_fmaxnm:
case Intrinsic::aarch64_neon_fminnm:
return instCombineMaxMinNM(IC, II);
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
index 463a5f5d2cfb5c..57372c46eecf2f 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
@@ -1321,4 +1321,3294 @@ define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64(<vscale x 2 x i64> %a, <v
ret <vscale x 2 x i64> %1
}
-attributes #0 = { "target-features"="+sve,+sve2" }
+define dso_local <vscale x 16 x i8> @test_svabs_m(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svabs_m(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <vscale x 16 x i8> [[A]]
+;
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %0
+}
+
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) #1
+
+
+define dso_local <vscale x 8 x half> @test_svabs_z(<vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svabs_z(
+; CHECK-SAME: <vscale x 8 x half> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <vscale x 8 x half> zeroinitializer
+;
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %0
+}
+
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>) #1
+
+
+define dso_local <vscale x 16 x i8> @test_svabs_m2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svabs_m2(
+; CHECK-SAME: <vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i1...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/86651
More information about the llvm-commits
mailing list