[llvm] 1a2e901 - [SVE][CodeGen] Add patterns for ADD/SUB + element count
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 13 03:47:14 PDT 2021
Author: Kerry McLaughlin
Date: 2021-10-13T11:36:15+01:00
New Revision: 1a2e90199f83e7e3a7267b8fa9715d87ed5b2f88
URL: https://github.com/llvm/llvm-project/commit/1a2e90199f83e7e3a7267b8fa9715d87ed5b2f88
DIFF: https://github.com/llvm/llvm-project/commit/1a2e90199f83e7e3a7267b8fa9715d87ed5b2f88.diff
LOG: [SVE][CodeGen] Add patterns for ADD/SUB + element count
This patch adds patterns to match the following with INC/DEC:
- @llvm.aarch64.sve.cnt[b|h|w|d] intrinsics + ADD/SUB
- vscale + ADD/SUB
For some implementations of SVE, INC/DEC VL is not as cheap as ADD/SUB and
so this behaviour is guarded by the "use-scalar-inc-vl" feature flag, which for SVE
is off by default. There are no known issues with SVE2, so this feature is
enabled by default when targeting SVE2.
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D111441
Added:
llvm/test/CodeGen/AArch64/sve-vl-arith.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/lib/Target/AArch64/AArch64.td
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
llvm/test/CodeGen/AArch64/sve-gep.ll
llvm/test/CodeGen/AArch64/sve-insert-element.ll
llvm/test/CodeGen/AArch64/sve-insert-vector.ll
llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
llvm/test/CodeGen/AArch64/sve-stepvector.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 34286f3388251..0a5dce34cd3ab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5034,6 +5034,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
+ if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes)
+ return getVScale(DL, VT, Operand.getConstantOperandAPInt(0));
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 13cbda48d328a..4ea1bf2889ba6 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -129,8 +129,12 @@ def FeatureExperimentalZeroingPseudos
"merged with destructive operations",
[]>;
+def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
+ "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
+
def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
- "Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;
+ "Enable Scalable Vector Extension 2 (SVE2) instructions",
+ [FeatureSVE, FeatureUseScalarIncVL]>;
def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true",
"Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index db3728abb97e8..3f20653ba49c4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -201,6 +201,8 @@ def UseNegativeImmediates
: Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
"NegativeImmediates">;
+def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
+
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 57c97e03dcc5d..7bd891a2acdc5 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1452,16 +1452,18 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>;
defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>;
defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>;
+}
- defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">;
- defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">;
- defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch">;
- defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech">;
- defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw">;
- defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw">;
- defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">;
- defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">;
+ defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>;
+ defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb", sub, int_aarch64_sve_cntb>;
+ defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch", add, int_aarch64_sve_cnth>;
+ defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech", sub, int_aarch64_sve_cnth>;
+ defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw", add, int_aarch64_sve_cntw>;
+ defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw", sub, int_aarch64_sve_cntw>;
+ defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd", add, int_aarch64_sve_cntd>;
+ defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd", sub, int_aarch64_sve_cntd>;
+let Predicates = [HasSVEorStreamingSVE] in {
defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>;
defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>;
defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>;
@@ -1893,6 +1895,72 @@ let Predicates = [HasSVEorStreamingSVE] in {
def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>;
}
+ let AddedComplexity = 5 in {
+ def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
+ (ADDVL_XXI GPR64:$op, $imm)>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (ADDVL_XXI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), $imm),
+ sub_32))>;
+
+ def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
+ (INCH_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
+ (INCW_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))),
+ (INCD_ZPiI ZPR:$op, 31, $imm)>;
+
+ def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
+ (DECH_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
+ (DECW_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))),
+ (DECD_ZPiI ZPR:$op, 31, $imm)>;
+ }
+
+ let Predicates = [HasSVE, UseScalarIncVL], AddedComplexity = 5 in {
+ def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))),
+ (INCH_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))),
+ (INCW_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm i32:$imm))),
+ (INCD_XPiI GPR64:$op, 31, $imm)>;
+
+ def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))),
+ (DECH_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))),
+ (DECW_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))),
+ (DECD_XPiI GPR64:$op, 31, $imm)>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (INCH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (INCW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (INCD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (DECH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (DECW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (DECD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ }
+
def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
(ADDVL_XXI GPR64:$op, $imm)>;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index dae0ebd1041c7..cfb0f689d05ee 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -123,6 +123,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
// SVE extensions
bool HasSVE = false;
bool UseExperimentalZeroingPseudos = false;
+ bool UseScalarIncVL = false;
// Armv8.2 Crypto extensions
bool HasSM4 = false;
@@ -457,6 +458,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
return UseExperimentalZeroingPseudos;
}
+ bool useScalarIncVL() const { return UseScalarIncVL; }
+
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
bool supportsAddressTopByteIgnored() const;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index f1d36d589daab..490e08a89471c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -920,13 +920,43 @@ class sve_int_pred_pattern_a<bits<3> opc, string asm>
let Constraints = "$Rdn = $_Rdn";
}
-multiclass sve_int_pred_pattern_a<bits<3> opc, string asm> {
- def NAME : sve_int_pred_pattern_a<opc, asm>;
+multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
+ SDPatternOperator op,
+ SDPatternOperator opcnt> {
+ let Predicates = [HasSVEorStreamingSVE] in {
+ def NAME : sve_int_pred_pattern_a<opc, asm>;
+
+ def : InstAlias<asm # "\t$Rdn, $pattern",
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>;
+ def : InstAlias<asm # "\t$Rdn",
+ (!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>;
+ }
- def : InstAlias<asm # "\t$Rdn, $pattern",
- (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>;
- def : InstAlias<asm # "\t$Rdn",
- (!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>;
+ let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL] in {
+ def : Pat<(i64 (op GPR64:$Rdn, (opcnt sve_pred_enum:$pattern))),
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1)>;
+
+ def : Pat<(i64 (op GPR64:$Rdn, (mul (opcnt sve_pred_enum:$pattern), (sve_cnt_mul_imm i32:$imm)))),
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>;
+
+ def : Pat<(i64 (op GPR64:$Rdn, (shl (opcnt sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm))))),
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>;
+
+ def : Pat<(i32 (op GPR32:$Rdn, (i32 (trunc (opcnt (sve_pred_enum:$pattern)))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, 1),
+ sub_32))>;
+
+ def : Pat<(i32 (op GPR32:$Rdn, (mul (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_mul_imm i32:$imm)))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm),
+ sub_32))>;
+
+ def : Pat<(i32 (op GPR32:$Rdn, (shl (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (i64 (sve_cnt_shl_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm),
+ sub_32))>;
+ }
}
class sve_int_pred_pattern_b<bits<5> opc, string asm, RegisterOperand dt,
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
index b8540d51eb044..0077ea3b7ff27 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
@@ -39,15 +39,15 @@ define <vscale x 16 x i8> @splice_nxv16i8_clamped_idx(<vscale x 16 x i8> %a, <vs
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
-; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov w10, #256
-; CHECK-NEXT: cmp x8, #256
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: csel x8, x8, x10, lo
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1b { z1.b }, p0, [x9, #1, mul vl]
+; CHECK-NEXT: cmp x8, #256
+; CHECK-NEXT: csel x8, x8, x10, lo
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -622,23 +622,23 @@ define <vscale x 16 x float> @splice_nxv16f32_clamped_idx(<vscale x 16 x float>
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-8
-; CHECK-NEXT: rdvl x9, #1
+; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov w10, #16
-; CHECK-NEXT: sub x9, x9, #1
-; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: cmp x9, #16
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: csel x9, x9, x10, lo
-; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl]
-; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl]
-; CHECK-NEXT: add x10, x8, x9, lsl #2
-; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
+; CHECK-NEXT: addvl x8, x8, #1
+; CHECK-NEXT: cmp x8, #16
+; CHECK-NEXT: st1w { z3.s }, p0, [x9, #3, mul vl]
+; CHECK-NEXT: csel x8, x8, x10, lo
+; CHECK-NEXT: st1w { z2.s }, p0, [x9, #2, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
-; CHECK-NEXT: st1w { z7.s }, p0, [x8, #7, mul vl]
-; CHECK-NEXT: st1w { z4.s }, p0, [x8, #4, mul vl]
-; CHECK-NEXT: st1w { z5.s }, p0, [x8, #5, mul vl]
-; CHECK-NEXT: st1w { z6.s }, p0, [x8, #6, mul vl]
-; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
+; CHECK-NEXT: add x10, x9, x8, lsl #2
+; CHECK-NEXT: st1w { z7.s }, p0, [x9, #7, mul vl]
+; CHECK-NEXT: st1w { z4.s }, p0, [x9, #4, mul vl]
+; CHECK-NEXT: st1w { z5.s }, p0, [x9, #5, mul vl]
+; CHECK-NEXT: st1w { z6.s }, p0, [x9, #6, mul vl]
+; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x10, #2, mul vl]
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x10, #3, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
index ff8be096cc410..066c7f8002c1e 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
@@ -229,12 +229,12 @@ define <16 x i8> @extract_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec) nounwind
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov x8, #-16
; CHECK-NEXT: mov w9, #16
-; CHECK-NEXT: sub x8, x8, #16
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK-NEXT: addvl x8, x8, #1
+; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldr q0, [x9, x8]
diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll
index a1b64741d5a51..d2445aff408be 100644
--- a/llvm/test/CodeGen/AArch64/sve-gep.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gep.ll
@@ -202,10 +202,8 @@ define <vscale x 2 x i64*> @scalable_of_fixed_5_i64(i64* %base, <vscale x 2 x i3
define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_1(<vscale x 2 x i64>* %base) {
; CHECK-LABEL: scalable_of_scalable_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: rdvl x8, #1
-; CHECK-NEXT: mov z1.d, x0
-; CHECK-NEXT: mov z0.d, x8
-; CHECK-NEXT: add z0.d, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, x0
+; CHECK-NEXT: incd z0.d, all, mul #8
; CHECK-NEXT: ret
%idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <vscale x 2 x i64> %idx
@@ -215,9 +213,7 @@ define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_1(<vscale x 2 x
define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_2(<vscale x 2 x <vscale x 2 x i64>*> %base) {
; CHECK-LABEL: scalable_of_scalable_2:
; CHECK: // %bb.0:
-; CHECK-NEXT: rdvl x8, #1
-; CHECK-NEXT: mov z1.d, x8
-; CHECK-NEXT: add z0.d, z0.d, z1.d
+; CHECK-NEXT: incd z0.d, all, mul #8
; CHECK-NEXT: ret
%idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
%d = getelementptr <vscale x 2 x i64>, <vscale x 2 x <vscale x 2 x i64>*> %base, <vscale x 2 x i64> %idx
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index aa543b1b46855..95ecf2582f762 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -503,16 +503,16 @@ define <vscale x 32 x i1> @test_predicate_insert_32xi1(<vscale x 32 x i1> %val,
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: rdvl x8, #2
+; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sxtw x9, w1
-; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov x10, sp
-; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
-; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: ptrue p1.b
+; CHECK-NEXT: addvl x8, x8, #2
+; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: st1b { z0.b }, p1, [x10, #1, mul vl]
+; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
; CHECK-NEXT: st1b { z0.b }, p1, [sp]
; CHECK-NEXT: strb w0, [x10, x8]
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index 9839ee51b77d2..49b3e85328552 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -139,12 +139,12 @@ define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec, <
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
-; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov x8, #-16
; CHECK-NEXT: mov w9, #16
-; CHECK-NEXT: sub x8, x8, #16
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK-NEXT: addvl x8, x8, #1
+; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: str q1, [x9, x8]
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
index b37e3d8b8c82e..85ecb33151141 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
@@ -1,4 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-scalar-inc-vl < %s | FileCheck %s -check-prefix=USE_SCALAR_INC
;
; CNTB
@@ -6,16 +8,28 @@
define i64 @cntb() {
; CHECK-LABEL: cntb:
-; CHECK: cntb x0, vl2
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntb x0, vl2
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntb:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntb x0, vl2
+; USE_SCALAR_INC-NEXT: ret
%out = call i64 @llvm.aarch64.sve.cntb(i32 2)
ret i64 %out
}
define i64 @cntb_mul3() {
; CHECK-LABEL: cntb_mul3:
-; CHECK: cntb x0, vl6, mul #3
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntb x0, vl6, mul #3
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntb_mul3:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntb x0, vl6, mul #3
+; USE_SCALAR_INC-NEXT: ret
%cnt = call i64 @llvm.aarch64.sve.cntb(i32 6)
%out = mul i64 %cnt, 3
ret i64 %out
@@ -23,8 +37,14 @@ define i64 @cntb_mul3() {
define i64 @cntb_mul4() {
; CHECK-LABEL: cntb_mul4:
-; CHECK: cntb x0, vl8, mul #4
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntb x0, vl8, mul #4
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntb_mul4:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntb x0, vl8, mul #4
+; USE_SCALAR_INC-NEXT: ret
%cnt = call i64 @llvm.aarch64.sve.cntb(i32 8)
%out = mul i64 %cnt, 4
ret i64 %out
@@ -36,16 +56,28 @@ define i64 @cntb_mul4() {
define i64 @cnth() {
; CHECK-LABEL: cnth:
-; CHECK: cnth x0, vl3
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cnth x0, vl3
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cnth:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cnth x0, vl3
+; USE_SCALAR_INC-NEXT: ret
%out = call i64 @llvm.aarch64.sve.cnth(i32 3)
ret i64 %out
}
define i64 @cnth_mul5() {
; CHECK-LABEL: cnth_mul5:
-; CHECK: cnth x0, vl7, mul #5
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cnth x0, vl7, mul #5
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cnth_mul5:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cnth x0, vl7, mul #5
+; USE_SCALAR_INC-NEXT: ret
%cnt = call i64 @llvm.aarch64.sve.cnth(i32 7)
%out = mul i64 %cnt, 5
ret i64 %out
@@ -53,8 +85,14 @@ define i64 @cnth_mul5() {
define i64 @cnth_mul8() {
; CHECK-LABEL: cnth_mul8:
-; CHECK: cnth x0, vl5, mul #8
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cnth x0, vl5, mul #8
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cnth_mul8:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cnth x0, vl5, mul #8
+; USE_SCALAR_INC-NEXT: ret
%cnt = call i64 @llvm.aarch64.sve.cnth(i32 5)
%out = mul i64 %cnt, 8
ret i64 %out
@@ -66,16 +104,28 @@ define i64 @cnth_mul8() {
define i64 @cntw() {
; CHECK-LABEL: cntw:
-; CHECK: cntw x0, vl4
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntw x0, vl4
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntw:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntw x0, vl4
+; USE_SCALAR_INC-NEXT: ret
%out = call i64 @llvm.aarch64.sve.cntw(i32 4)
ret i64 %out
}
define i64 @cntw_mul11() {
; CHECK-LABEL: cntw_mul11:
-; CHECK: cntw x0, vl8, mul #11
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntw x0, vl8, mul #11
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntw_mul11:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntw x0, vl8, mul #11
+; USE_SCALAR_INC-NEXT: ret
%cnt = call i64 @llvm.aarch64.sve.cntw(i32 8)
%out = mul i64 %cnt, 11
ret i64 %out
@@ -83,8 +133,14 @@ define i64 @cntw_mul11() {
define i64 @cntw_mul2() {
; CHECK-LABEL: cntw_mul2:
-; CHECK: cntw x0, vl6, mul #2
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntw x0, vl6, mul #2
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntw_mul2:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntw x0, vl6, mul #2
+; USE_SCALAR_INC-NEXT: ret
%cnt = call i64 @llvm.aarch64.sve.cntw(i32 6)
%out = mul i64 %cnt, 2
ret i64 %out
@@ -96,16 +152,28 @@ define i64 @cntw_mul2() {
define i64 @cntd() {
; CHECK-LABEL: cntd:
-; CHECK: cntd x0, vl5
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x0, vl5
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntd:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntd x0, vl5
+; USE_SCALAR_INC-NEXT: ret
%out = call i64 @llvm.aarch64.sve.cntd(i32 5)
ret i64 %out
}
define i64 @cntd_mul15() {
; CHECK-LABEL: cntd_mul15:
-; CHECK: cntd x0, vl16, mul #15
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x0, vl16, mul #15
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntd_mul15:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntd x0, vl16, mul #15
+; USE_SCALAR_INC-NEXT: ret
%cnt = call i64 @llvm.aarch64.sve.cntd(i32 9)
%out = mul i64 %cnt, 15
ret i64 %out
@@ -113,8 +181,14 @@ define i64 @cntd_mul15() {
define i64 @cntd_mul16() {
; CHECK-LABEL: cntd_mul16:
-; CHECK: cntd x0, vl32, mul #16
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x0, vl32, mul #16
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntd_mul16:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntd x0, vl32, mul #16
+; USE_SCALAR_INC-NEXT: ret
%cnt = call i64 @llvm.aarch64.sve.cntd(i32 10)
%out = mul i64 %cnt, 16
ret i64 %out
@@ -126,8 +200,14 @@ define i64 @cntd_mul16() {
define i64 @cntp_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
; CHECK-LABEL: cntp_b8:
-; CHECK: cntp x0, p0, p1.b
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, p0, p1.b
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntp_b8:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntp x0, p0, p1.b
+; USE_SCALAR_INC-NEXT: ret
%out = call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg,
<vscale x 16 x i1> %a)
ret i64 %out
@@ -135,8 +215,14 @@ define i64 @cntp_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
define i64 @cntp_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a) {
; CHECK-LABEL: cntp_b16:
-; CHECK: cntp x0, p0, p1.h
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, p0, p1.h
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntp_b16:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntp x0, p0, p1.h
+; USE_SCALAR_INC-NEXT: ret
%out = call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg,
<vscale x 8 x i1> %a)
ret i64 %out
@@ -144,8 +230,14 @@ define i64 @cntp_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a) {
define i64 @cntp_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a) {
; CHECK-LABEL: cntp_b32:
-; CHECK: cntp x0, p0, p1.s
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, p0, p1.s
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntp_b32:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntp x0, p0, p1.s
+; USE_SCALAR_INC-NEXT: ret
%out = call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg,
<vscale x 4 x i1> %a)
ret i64 %out
@@ -153,13 +245,311 @@ define i64 @cntp_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a) {
define i64 @cntp_b64(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %a) {
; CHECK-LABEL: cntp_b64:
-; CHECK: cntp x0, p0, p1.d
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntp x0, p0, p1.d
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: cntp_b64:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: cntp x0, p0, p1.d
+; USE_SCALAR_INC-NEXT: ret
%out = call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg,
<vscale x 2 x i1> %a)
ret i64 %out
}
+;
+; INCB
+;
+
+define i64 @incb(i64 %a) {
+; CHECK-LABEL: incb:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntb x8, vl5
+; CHECK-NEXT: add x0, x8, x0
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: incb:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: incb x0, vl5
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntb(i32 5)
+ %out = add i64 %cnt, %a
+ ret i64 %out
+}
+
+define i64 @incb_mul(i64 %a) {
+; CHECK-LABEL: incb_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntb x8, vl4
+; CHECK-NEXT: add x0, x0, x8, lsl #2
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: incb_mul:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: incb x0, vl4, mul #4
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntb(i32 4)
+ %mul = mul i64 %cnt, 4
+ %out = add i64 %mul, %a
+ ret i64 %out
+}
+
+;
+; DECB
+;
+
+define i64 @decb(i64 %a) {
+; CHECK-LABEL: decb:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntb x8, vl6
+; CHECK-NEXT: sub x0, x0, x8
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: decb:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: decb x0, vl6
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6)
+ %out = sub i64 %a, %cnt
+ ret i64 %out
+}
+
+define i64 @decb_mul(i64 %a) {
+; CHECK-LABEL: decb_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntb x8, vl7
+; CHECK-NEXT: sub x0, x0, x8, lsl #3
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: decb_mul:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: decb x0, vl7, mul #8
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntb(i32 7)
+ %mul = mul i64 %cnt, 8
+ %out = sub i64 %a, %mul
+ ret i64 %out
+}
+
+;
+; INCH
+;
+
+define i64 @inch(i64 %a) {
+; CHECK-LABEL: inch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cnth x8, vl4
+; CHECK-NEXT: add x0, x8, x0
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: inch:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: inch x0, vl4
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cnth(i32 4)
+ %out = add i64 %cnt, %a
+ ret i64 %out
+}
+
+define i64 @inch_mul(i64 %a) {
+; CHECK-LABEL: inch_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cnth x8, vl8, mul #5
+; CHECK-NEXT: add x0, x8, x0
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: inch_mul:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: inch x0, vl8, mul #5
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cnth(i32 8)
+ %mul = mul i64 %cnt, 5
+ %out = add i64 %mul, %a
+ ret i64 %out
+}
+
+;
+; DECH
+;
+
+define i64 @dech(i64 %a) {
+; CHECK-LABEL: dech:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cnth x8, vl1
+; CHECK-NEXT: sub x0, x0, x8
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: dech:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: dech x0, vl1
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cnth(i32 1)
+ %out = sub i64 %a, %cnt
+ ret i64 %out
+}
+
+define i64 @dech_mul(i64 %a) {
+; CHECK-LABEL: dech_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cnth x8, vl16, mul #7
+; CHECK-NEXT: sub x0, x0, x8
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: dech_mul:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: dech x0, vl16, mul #7
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cnth(i32 9)
+ %mul = mul i64 %cnt, 7
+ %out = sub i64 %a, %mul
+ ret i64 %out
+}
+
+;
+; INCW
+;
+
+define i64 @incw(i64 %a) {
+; CHECK-LABEL: incw:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntw x8, #16
+; CHECK-NEXT: add x0, x8, x0
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: incw:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: incw x0, #16
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntw(i32 16)
+ %out = add i64 %cnt, %a
+ ret i64 %out
+}
+
+define i64 @incw_mul(i64 %a) {
+; CHECK-LABEL: incw_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntw x8, vl32, mul #12
+; CHECK-NEXT: add x0, x8, x0
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: incw_mul:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: incw x0, vl32, mul #12
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntw(i32 10)
+ %mul = mul i64 %cnt, 12
+ %out = add i64 %mul, %a
+ ret i64 %out
+}
+
+;
+; DECW
+;
+
+define i64 @decw(i64 %a) {
+; CHECK-LABEL: decw:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntw x8
+; CHECK-NEXT: sub x0, x0, x8
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: decw:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: decw x0
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
+ %out = sub i64 %a, %cnt
+ ret i64 %out
+}
+
+define i64 @decw_mul(i64 %a) {
+; CHECK-LABEL: decw_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntw x8, vl128
+; CHECK-NEXT: sub x0, x0, x8, lsl #4
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: decw_mul:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: decw x0, vl128, mul #16
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntw(i32 12)
+ %mul = mul i64 %cnt, 16
+ %out = sub i64 %a, %mul
+ ret i64 %out
+}
+
+define i64 @incd(i64 %a) {
+; CHECK-LABEL: incd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x8, vl8
+; CHECK-NEXT: add x0, x8, x0
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: incd:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: incd x0, vl8
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntd(i32 8)
+ %out = add i64 %cnt, %a
+ ret i64 %out
+}
+
+define i64 @incd_mul(i64 %a) {
+; CHECK-LABEL: incd_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x8, all, mul #15
+; CHECK-NEXT: add x0, x8, x0
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: incd_mul:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: incd x0, all, mul #15
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
+ %mul = mul i64 %cnt, 15
+ %out = add i64 %mul, %a
+ ret i64 %out
+}
+
+;
+; DECD
+;
+
+define i64 @decd(i64 %a) {
+; CHECK-LABEL: decd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x8, #16
+; CHECK-NEXT: sub x0, x0, x8
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: decd:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: decd x0, #16
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntd(i32 16)
+ %out = sub i64 %a, %cnt
+ ret i64 %out
+}
+
+define i64 @decd_mul(i64 %a) {
+; CHECK-LABEL: decd_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cntd x8, vl2, mul #9
+; CHECK-NEXT: sub x0, x0, x8
+; CHECK-NEXT: ret
+;
+; USE_SCALAR_INC-LABEL: decd_mul:
+; USE_SCALAR_INC: // %bb.0:
+; USE_SCALAR_INC-NEXT: decd x0, vl2, mul #9
+; USE_SCALAR_INC-NEXT: ret
+ %cnt = call i64 @llvm.aarch64.sve.cntd(i32 2)
+ %mul = mul i64 %cnt, 9
+ %out = sub i64 %a, %mul
+ ret i64 %out
+}
+
declare i64 @llvm.aarch64.sve.cntb(i32 %pattern)
declare i64 @llvm.aarch64.sve.cnth(i32 %pattern)
declare i64 @llvm.aarch64.sve.cntw(i32 %pattern)
diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
index 97ed67cc3d8bd..56d2ff25cb15e 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
@@ -23,15 +23,15 @@ define i8 @split_extract_32i8_idx(<vscale x 32 x i8> %a, i32 %idx) {
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: rdvl x8, #2
+; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x9, w0
-; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov x10, sp
-; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: csel x8, x9, x8, lo
+; CHECK-NEXT: addvl x8, x8, #2
+; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: st1b { z1.b }, p0, [x10, #1, mul vl]
+; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: ldrb w0, [x10, x8]
; CHECK-NEXT: addvl sp, sp, #2
@@ -48,15 +48,15 @@ define i16 @split_extract_16i16_idx(<vscale x 16 x i16> %a, i32 %idx) {
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x9, w0
-; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov x10, sp
-; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: csel x8, x9, x8, lo
+; CHECK-NEXT: addvl x8, x8, #1
+; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl]
+; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: ldrh w0, [x10, x8, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2
@@ -145,15 +145,15 @@ define i16 @split_extract_16i16(<vscale x 16 x i16> %a) {
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: rdvl x8, #1
+; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov w10, #128
-; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: csel x8, x8, x10, lo
+; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl]
+; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
+; CHECK-NEXT: csel x8, x8, x10, lo
; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -169,19 +169,19 @@ define i32 @split_extract_16i32(<vscale x 16 x i32> %a) {
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: rdvl x9, #1
+; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov w10, #34464
; CHECK-NEXT: movk w10, #1, lsl #16
-; CHECK-NEXT: sub x9, x9, #1
-; CHECK-NEXT: mov x8, sp
-; CHECK-NEXT: cmp x9, x10
-; CHECK-NEXT: csel x9, x9, x10, lo
+; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl]
-; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl]
-; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
+; CHECK-NEXT: addvl x8, x8, #1
+; CHECK-NEXT: cmp x8, x10
+; CHECK-NEXT: st1w { z3.s }, p0, [x9, #3, mul vl]
+; CHECK-NEXT: csel x8, x8, x10, lo
+; CHECK-NEXT: st1w { z2.s }, p0, [x9, #2, mul vl]
+; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
-; CHECK-NEXT: ldr w0, [x8, x9, lsl #2]
+; CHECK-NEXT: ldr w0, [x9, x8, lsl #2]
; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
index 8f4ba66f208ac..0b9baa23a11fc 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
@@ -23,14 +23,14 @@ define <vscale x 32 x i8> @split_insert_32i8_idx(<vscale x 32 x i8> %a, i8 %elt,
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: rdvl x8, #2
+; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: cmp x1, x8
-; CHECK-NEXT: csel x8, x1, x8, lo
+; CHECK-NEXT: addvl x8, x8, #2
; CHECK-NEXT: st1b { z1.b }, p0, [x9, #1, mul vl]
+; CHECK-NEXT: cmp x1, x8
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
+; CHECK-NEXT: csel x8, x1, x8, lo
; CHECK-NEXT: strb w0, [x9, x8]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x9, #1, mul vl]
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
@@ -135,14 +135,14 @@ define <vscale x 32 x i16> @split_insert_32i16(<vscale x 32 x i16> %a, i16 %elt)
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
; CHECK-NEXT: .cfi_offset w29, -16
-; CHECK-NEXT: rdvl x8, #2
+; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov w10, #128
-; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: csel x8, x8, x10, lo
+; CHECK-NEXT: addvl x8, x8, #2
+; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: st1h { z3.h }, p0, [x9, #3, mul vl]
+; CHECK-NEXT: csel x8, x8, x10, lo
; CHECK-NEXT: st1h { z2.h }, p0, [x9, #2, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl]
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
diff --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll
index 71e69e33d8c66..8dd8562670ccd 100644
--- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll
@@ -48,10 +48,9 @@ entry:
define <vscale x 4 x i64> @stepvector_nxv4i64() {
; CHECK-LABEL: stepvector_nxv4i64:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: cntd x8
; CHECK-NEXT: index z0.d, #0, #1
-; CHECK-NEXT: mov z1.d, x8
-; CHECK-NEXT: add z1.d, z0.d, z1.d
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: incd z1.d
; CHECK-NEXT: ret
entry:
%0 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
@@ -61,14 +60,13 @@ entry:
define <vscale x 16 x i32> @stepvector_nxv16i32() {
; CHECK-LABEL: stepvector_nxv16i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: cntw x9
-; CHECK-NEXT: cnth x8
; CHECK-NEXT: index z0.s, #0, #1
-; CHECK-NEXT: mov z1.s, w9
-; CHECK-NEXT: mov z3.s, w8
-; CHECK-NEXT: add z1.s, z0.s, z1.s
-; CHECK-NEXT: add z2.s, z0.s, z3.s
-; CHECK-NEXT: add z3.s, z1.s, z3.s
+; CHECK-NEXT: mov z1.d, z0.d
+; CHECK-NEXT: mov z2.d, z0.d
+; CHECK-NEXT: incw z1.s
+; CHECK-NEXT: incw z2.s, all, mul #2
+; CHECK-NEXT: mov z3.d, z1.d
+; CHECK-NEXT: incw z3.s, all, mul #2
; CHECK-NEXT: ret
entry:
%0 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
diff --git a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
new file mode 100644
index 0000000000000..6323850515322
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
@@ -0,0 +1,425 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s -check-prefix=NO_SCALAR_INC
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-scalar-inc-vl -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 8 x i16> @inch_vec(<vscale x 8 x i16> %a) {
+; NO_SCALAR_INC-LABEL: inch_vec:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: inch z0.h
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: inch_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: inch z0.h
+; CHECK-NEXT: ret
+ %vscale = call i16 @llvm.vscale.i16()
+ %mul = mul i16 %vscale, 8
+ %vl = insertelement <vscale x 8 x i16> undef, i16 %mul, i32 0
+ %vl.splat = shufflevector <vscale x 8 x i16> %vl, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %res = add <vscale x 8 x i16> %a, %vl.splat
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @incw_vec(<vscale x 4 x i32> %a) {
+; NO_SCALAR_INC-LABEL: incw_vec:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: incw z0.s
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: incw_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: incw z0.s
+; CHECK-NEXT: ret
+ %vscale = call i32 @llvm.vscale.i32()
+ %mul = mul i32 %vscale, 4
+ %vl = insertelement <vscale x 4 x i32> undef, i32 %mul, i32 0
+ %vl.splat = shufflevector <vscale x 4 x i32> %vl, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %res = add <vscale x 4 x i32> %a, %vl.splat
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @incd_vec(<vscale x 2 x i64> %a) {
+; NO_SCALAR_INC-LABEL: incd_vec:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: incd z0.d
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: incd_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: incd z0.d
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 2
+ %vl = insertelement <vscale x 2 x i64> undef, i64 %mul, i32 0
+ %vl.splat = shufflevector <vscale x 2 x i64> %vl, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %res = add <vscale x 2 x i64> %a, %vl.splat
+ ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x i16> @dech_vec(<vscale x 8 x i16> %a) {
+; NO_SCALAR_INC-LABEL: dech_vec:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: dech z0.h, all, mul #2
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: dech_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dech z0.h, all, mul #2
+; CHECK-NEXT: ret
+ %vscale = call i16 @llvm.vscale.i16()
+ %mul = mul i16 %vscale, 16
+ %vl = insertelement <vscale x 8 x i16> undef, i16 %mul, i32 0
+ %vl.splat = shufflevector <vscale x 8 x i16> %vl, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+ %res = sub <vscale x 8 x i16> %a, %vl.splat
+ ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @decw_vec(<vscale x 4 x i32> %a) {
+; NO_SCALAR_INC-LABEL: decw_vec:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: decw z0.s, all, mul #4
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: decw_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: decw z0.s, all, mul #4
+; CHECK-NEXT: ret
+ %vscale = call i32 @llvm.vscale.i32()
+ %mul = mul i32 %vscale, 16
+ %vl = insertelement <vscale x 4 x i32> undef, i32 %mul, i32 0
+ %vl.splat = shufflevector <vscale x 4 x i32> %vl, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+ %res = sub <vscale x 4 x i32> %a, %vl.splat
+ ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @decd_vec(<vscale x 2 x i64> %a) {
+; NO_SCALAR_INC-LABEL: decd_vec:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: decd z0.d, all, mul #8
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: decd_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: decd z0.d, all, mul #8
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 16
+ %vl = insertelement <vscale x 2 x i64> undef, i64 %mul, i32 0
+ %vl.splat = shufflevector <vscale x 2 x i64> %vl, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+ %res = sub <vscale x 2 x i64> %a, %vl.splat
+ ret <vscale x 2 x i64> %res
+}
+
+; NOTE: As there is no need for the predicate pattern we
+; fall back to using ADDVL with its larger immediate range.
+define i64 @incb_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: incb_scalar_i64:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: addvl x0, x0, #1
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: incb_scalar_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addvl x0, x0, #1
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 16
+ %add = add i64 %a, %mul
+ ret i64 %add
+}
+
+define i64 @inch_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: inch_scalar_i64:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cnth x8
+; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: inch_scalar_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: inch x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 8
+ %add = add i64 %a, %mul
+ ret i64 %add
+}
+
+define i64 @incw_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: incw_scalar_i64:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cntw x8
+; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: incw_scalar_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: incw x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 4
+ %add = add i64 %a, %mul
+ ret i64 %add
+}
+
+define i64 @incd_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: incd_scalar_i64:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cntd x8
+; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: incd_scalar_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: incd x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 2
+ %add = add i64 %a, %mul
+ ret i64 %add
+}
+
+; NOTE: As there is no need for the predicate pattern we
+; fall back to using ADDVL with its larger immediate range.
+define i64 @decb_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: decb_scalar_i64:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: addvl x0, x0, #-2
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: decb_scalar_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: addvl x0, x0, #-2
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 32
+ %sub = sub i64 %a, %mul
+ ret i64 %sub
+}
+
+define i64 @dech_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: dech_scalar_i64:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cnth x8, all, mul #3
+; NO_SCALAR_INC-NEXT: neg x8, x8
+; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: dech_scalar_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: dech x0, all, mul #3
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 24
+ %sub = sub i64 %a, %mul
+ ret i64 %sub
+}
+
+define i64 @decw_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: decw_scalar_i64:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cntw x8, all, mul #3
+; NO_SCALAR_INC-NEXT: neg x8, x8
+; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: decw_scalar_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: decw x0, all, mul #3
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 12
+ %sub = sub i64 %a, %mul
+ ret i64 %sub
+}
+
+define i64 @decd_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: decd_scalar_i64:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cntd x8, all, mul #3
+; NO_SCALAR_INC-NEXT: neg x8, x8
+; NO_SCALAR_INC-NEXT: add x0, x0, x8
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: decd_scalar_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: decd x0, all, mul #3
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 6
+ %sub = sub i64 %a, %mul
+ ret i64 %sub
+}
+
+; NOTE: As there is no need for the predicate pattern we
+; fall back to using ADDVL with its larger immediate range.
+define i32 @incb_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: incb_scalar_i32:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 def $x0
+; NO_SCALAR_INC-NEXT: addvl x0, x0, #3
+; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NO_SCALAR_INC-NEXT: ret
+
+; CHECK-LABEL: incb_scalar_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: addvl x0, x0, #3
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 48
+ %vl = trunc i64 %mul to i32
+ %add = add i32 %a, %vl
+ ret i32 %add
+}
+
+define i32 @inch_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: inch_scalar_i32:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cnth x8, all, mul #7
+; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: ret
+
+; CHECK-LABEL: inch_scalar_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: inch x0, all, mul #7
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 56
+ %vl = trunc i64 %mul to i32
+ %add = add i32 %a, %vl
+ ret i32 %add
+}
+
+define i32 @incw_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: incw_scalar_i32:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cntw x8, all, mul #7
+; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: ret
+
+; CHECK-LABEL: incw_scalar_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: incw x0, all, mul #7
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 28
+ %vl = trunc i64 %mul to i32
+ %add = add i32 %a, %vl
+ ret i32 %add
+}
+
+define i32 @incd_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: incd_scalar_i32:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cntd x8, all, mul #7
+; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: ret
+
+; CHECK-LABEL: incd_scalar_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: incd x0, all, mul #7
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 14
+ %vl = trunc i64 %mul to i32
+ %add = add i32 %a, %vl
+ ret i32 %add
+}
+
+; NOTE: As there is no need for the predicate pattern we
+; fall back to using ADDVL with its larger immediate range.
+define i32 @decb_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: decb_scalar_i32:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 def $x0
+; NO_SCALAR_INC-NEXT: addvl x0, x0, #-4
+; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 killed $x0
+; NO_SCALAR_INC-NEXT: ret
+
+; CHECK-LABEL: decb_scalar_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: addvl x0, x0, #-4
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 64
+ %vl = trunc i64 %mul to i32
+ %sub = sub i32 %a, %vl
+ ret i32 %sub
+}
+
+define i32 @dech_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: dech_scalar_i32:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cnth x8
+; NO_SCALAR_INC-NEXT: neg x8, x8
+; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: ret
+
+; CHECK-LABEL: dech_scalar_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: dech x0
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 8
+ %vl = trunc i64 %mul to i32
+ %sub = sub i32 %a, %vl
+ ret i32 %sub
+}
+
+define i32 @decw_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: decw_scalar_i32:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cntw x8
+; NO_SCALAR_INC-NEXT: neg x8, x8
+; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: ret
+
+; CHECK-LABEL: decw_scalar_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: decw x0
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 4
+ %vl = trunc i64 %mul to i32
+ %sub = sub i32 %a, %vl
+ ret i32 %sub
+}
+
+define i32 @decd_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: decd_scalar_i32:
+; NO_SCALAR_INC: // %bb.0:
+; NO_SCALAR_INC-NEXT: cntd x8
+; NO_SCALAR_INC-NEXT: neg x8, x8
+; NO_SCALAR_INC-NEXT: add w0, w0, w8
+; NO_SCALAR_INC-NEXT: ret
+;
+; CHECK-LABEL: decd_scalar_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: decd x0
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: ret
+ %vscale = call i64 @llvm.vscale.i64()
+ %mul = mul i64 %vscale, 2
+ %vl = trunc i64 %mul to i32
+ %sub = sub i32 %a, %vl
+ ret i32 %sub
+}
+
+declare i16 @llvm.vscale.i16()
+declare i32 @llvm.vscale.i32()
+declare i64 @llvm.vscale.i64()
More information about the llvm-commits
mailing list