[llvm] 1a2e901 - [SVE][CodeGen] Add patterns for ADD/SUB + element count

Kerry McLaughlin via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 13 03:47:14 PDT 2021


Author: Kerry McLaughlin
Date: 2021-10-13T11:36:15+01:00
New Revision: 1a2e90199f83e7e3a7267b8fa9715d87ed5b2f88

URL: https://github.com/llvm/llvm-project/commit/1a2e90199f83e7e3a7267b8fa9715d87ed5b2f88
DIFF: https://github.com/llvm/llvm-project/commit/1a2e90199f83e7e3a7267b8fa9715d87ed5b2f88.diff

LOG: [SVE][CodeGen] Add patterns for ADD/SUB + element count

This patch adds patterns to match the following with INC/DEC:
 - @llvm.aarch64.sve.cnt[b|h|w|d] intrinsics + ADD/SUB
 - vscale + ADD/SUB

For some implementations of SVE, INC/DEC VL is not as cheap as ADD/SUB and
so this behaviour is guarded by the "use-scalar-inc-vl" feature flag, which for SVE
is off by default. There are no known issues with SVE2, so this feature is
enabled by default when targeting SVE2.

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D111441

Added: 
    llvm/test/CodeGen/AArch64/sve-vl-arith.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/lib/Target/AArch64/AArch64.td
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
    llvm/lib/Target/AArch64/AArch64Subtarget.h
    llvm/lib/Target/AArch64/SVEInstrFormats.td
    llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
    llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
    llvm/test/CodeGen/AArch64/sve-gep.ll
    llvm/test/CodeGen/AArch64/sve-insert-element.ll
    llvm/test/CodeGen/AArch64/sve-insert-vector.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
    llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
    llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
    llvm/test/CodeGen/AArch64/sve-stepvector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 34286f3388251..0a5dce34cd3ab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5034,6 +5034,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     }
     if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
+    if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes)
+      return getVScale(DL, VT, Operand.getConstantOperandAPInt(0));
     break;
   case ISD::ANY_EXTEND_VECTOR_INREG:
   case ISD::ZERO_EXTEND_VECTOR_INREG:

diff  --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 13cbda48d328a..4ea1bf2889ba6 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -129,8 +129,12 @@ def FeatureExperimentalZeroingPseudos
                        "merged with destructive operations",
                        []>;
 
+def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
+  "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
+
 def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
-  "Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;
+  "Enable Scalable Vector Extension 2 (SVE2) instructions",
+  [FeatureSVE, FeatureUseScalarIncVL]>;
 
 def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true",
   "Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>;

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index db3728abb97e8..3f20653ba49c4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -201,6 +201,8 @@ def UseNegativeImmediates
     : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
                                              "NegativeImmediates">;
 
+def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
+
 def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
                                   SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
                                                        SDTCisInt<1>]>>;

diff  --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 57c97e03dcc5d..7bd891a2acdc5 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1452,16 +1452,18 @@ let Predicates = [HasSVEorStreamingSVE] in {
   defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>;
   defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>;
   defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>;
+}
 
-  defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">;
-  defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">;
-  defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch">;
-  defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech">;
-  defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw">;
-  defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw">;
-  defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">;
-  defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">;
+  defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>;
+  defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb", sub, int_aarch64_sve_cntb>;
+  defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch", add, int_aarch64_sve_cnth>;
+  defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech", sub, int_aarch64_sve_cnth>;
+  defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw", add, int_aarch64_sve_cntw>;
+  defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw", sub, int_aarch64_sve_cntw>;
+  defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd", add, int_aarch64_sve_cntd>;
+  defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd", sub, int_aarch64_sve_cntd>;
 
+let Predicates = [HasSVEorStreamingSVE] in {
   defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>;
   defm UQINCB_WPiI   : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>;
   defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>;
@@ -1893,6 +1895,72 @@ let Predicates = [HasSVEorStreamingSVE] in {
     def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>;
   }
 
+  let AddedComplexity = 5 in {
+    def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
+              (ADDVL_XXI GPR64:$op, $imm)>;
+
+    def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))),
+              (i32 (EXTRACT_SUBREG (ADDVL_XXI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                             GPR32:$op, sub_32), $imm),
+                                   sub_32))>;
+
+    def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
+              (INCH_ZPiI ZPR:$op, 31, $imm)>;
+    def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
+              (INCW_ZPiI ZPR:$op, 31, $imm)>;
+    def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))),
+              (INCD_ZPiI ZPR:$op, 31, $imm)>;
+
+    def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
+              (DECH_ZPiI ZPR:$op, 31, $imm)>;
+    def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
+              (DECW_ZPiI ZPR:$op, 31, $imm)>;
+    def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))),
+              (DECD_ZPiI ZPR:$op, 31, $imm)>;
+  }
+
+  let Predicates = [HasSVE, UseScalarIncVL], AddedComplexity = 5 in {
+    def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))),
+              (INCH_XPiI GPR64:$op, 31, $imm)>;
+    def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))),
+              (INCW_XPiI GPR64:$op, 31, $imm)>;
+    def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm i32:$imm))),
+              (INCD_XPiI GPR64:$op, 31, $imm)>;
+
+    def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))),
+              (DECH_XPiI GPR64:$op, 31, $imm)>;
+    def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))),
+              (DECW_XPiI GPR64:$op, 31, $imm)>;
+    def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))),
+              (DECD_XPiI GPR64:$op, 31, $imm)>;
+
+    def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm i32:$imm))))),
+              (i32 (EXTRACT_SUBREG (INCH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                               GPR32:$op, sub_32), 31, $imm),
+                                    sub_32))>;
+    def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm i32:$imm))))),
+              (i32 (EXTRACT_SUBREG (INCW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                               GPR32:$op, sub_32), 31, $imm),
+                                    sub_32))>;
+    def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm i32:$imm))))),
+              (i32 (EXTRACT_SUBREG (INCD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                               GPR32:$op, sub_32), 31, $imm),
+                                    sub_32))>;
+
+    def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))),
+              (i32 (EXTRACT_SUBREG (DECH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                               GPR32:$op, sub_32), 31, $imm),
+                                    sub_32))>;
+    def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))),
+              (i32 (EXTRACT_SUBREG (DECW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                               GPR32:$op, sub_32), 31, $imm),
+                                    sub_32))>;
+    def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))),
+              (i32 (EXTRACT_SUBREG (DECD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                               GPR32:$op, sub_32), 31, $imm),
+                                    sub_32))>;
+  }
+
   def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
             (ADDVL_XXI GPR64:$op, $imm)>;
 

diff  --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index dae0ebd1041c7..cfb0f689d05ee 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -123,6 +123,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   // SVE extensions
   bool HasSVE = false;
   bool UseExperimentalZeroingPseudos = false;
+  bool UseScalarIncVL = false;
 
   // Armv8.2 Crypto extensions
   bool HasSM4 = false;
@@ -457,6 +458,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
     return UseExperimentalZeroingPseudos;
   }
 
+  bool useScalarIncVL() const { return UseScalarIncVL; }
+
   /// CPU has TBI (top byte of addresses is ignored during HW address
   /// translation) and OS enables it.
   bool supportsAddressTopByteIgnored() const;

diff  --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index f1d36d589daab..490e08a89471c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -920,13 +920,43 @@ class sve_int_pred_pattern_a<bits<3> opc, string asm>
   let Constraints = "$Rdn = $_Rdn";
 }
 
-multiclass sve_int_pred_pattern_a<bits<3> opc, string asm> {
-  def NAME : sve_int_pred_pattern_a<opc, asm>;
+multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
+                                  SDPatternOperator op,
+                                  SDPatternOperator opcnt> {
+  let Predicates = [HasSVEorStreamingSVE] in {
+    def NAME : sve_int_pred_pattern_a<opc, asm>;
+
+    def : InstAlias<asm # "\t$Rdn, $pattern",
+                    (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>;
+    def : InstAlias<asm # "\t$Rdn",
+                    (!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>;
+  }
 
-  def : InstAlias<asm # "\t$Rdn, $pattern",
-                  (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>;
-  def : InstAlias<asm # "\t$Rdn",
-                  (!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>;
+  let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL] in {
+    def : Pat<(i64 (op GPR64:$Rdn, (opcnt sve_pred_enum:$pattern))),
+              (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1)>;
+
+    def : Pat<(i64 (op GPR64:$Rdn, (mul (opcnt sve_pred_enum:$pattern), (sve_cnt_mul_imm i32:$imm)))),
+              (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>;
+
+    def : Pat<(i64 (op GPR64:$Rdn, (shl (opcnt sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm))))),
+              (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>;
+
+    def : Pat<(i32 (op GPR32:$Rdn, (i32 (trunc (opcnt (sve_pred_enum:$pattern)))))),
+              (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                               GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, 1),
+                                    sub_32))>;
+
+    def : Pat<(i32 (op GPR32:$Rdn, (mul (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_mul_imm i32:$imm)))),
+              (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                               GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm),
+                                    sub_32))>;
+
+    def : Pat<(i32 (op GPR32:$Rdn, (shl (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (i64 (sve_cnt_shl_imm i32:$imm))))),
+              (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+                                               GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm),
+                                    sub_32))>;
+  }
 }
 
 class sve_int_pred_pattern_b<bits<5> opc, string asm, RegisterOperand dt,

diff  --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
index b8540d51eb044..0077ea3b7ff27 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
@@ -39,15 +39,15 @@ define <vscale x 16 x i8> @splice_nxv16i8_clamped_idx(<vscale x 16 x i8> %a, <vs
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-2
-; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    mov w10, #256
-; CHECK-NEXT:    cmp x8, #256
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    csel x8, x8, x10, lo
 ; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
+; CHECK-NEXT:    addvl x8, x8, #1
 ; CHECK-NEXT:    st1b { z1.b }, p0, [x9, #1, mul vl]
+; CHECK-NEXT:    cmp x8, #256
+; CHECK-NEXT:    csel x8, x8, x10, lo
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x9, x8]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -622,23 +622,23 @@ define <vscale x 16 x float> @splice_nxv16f32_clamped_idx(<vscale x 16 x float>
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-8
-; CHECK-NEXT:    rdvl x9, #1
+; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov w10, #16
-; CHECK-NEXT:    sub x9, x9, #1
-; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    cmp x9, #16
+; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    csel x9, x9, x10, lo
-; CHECK-NEXT:    st1w { z3.s }, p0, [x8, #3, mul vl]
-; CHECK-NEXT:    st1w { z2.s }, p0, [x8, #2, mul vl]
-; CHECK-NEXT:    add x10, x8, x9, lsl #2
-; CHECK-NEXT:    st1w { z1.s }, p0, [x8, #1, mul vl]
+; CHECK-NEXT:    addvl x8, x8, #1
+; CHECK-NEXT:    cmp x8, #16
+; CHECK-NEXT:    st1w { z3.s }, p0, [x9, #3, mul vl]
+; CHECK-NEXT:    csel x8, x8, x10, lo
+; CHECK-NEXT:    st1w { z2.s }, p0, [x9, #2, mul vl]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x9, #1, mul vl]
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    st1w { z7.s }, p0, [x8, #7, mul vl]
-; CHECK-NEXT:    st1w { z4.s }, p0, [x8, #4, mul vl]
-; CHECK-NEXT:    st1w { z5.s }, p0, [x8, #5, mul vl]
-; CHECK-NEXT:    st1w { z6.s }, p0, [x8, #6, mul vl]
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
+; CHECK-NEXT:    add x10, x9, x8, lsl #2
+; CHECK-NEXT:    st1w { z7.s }, p0, [x9, #7, mul vl]
+; CHECK-NEXT:    st1w { z4.s }, p0, [x9, #4, mul vl]
+; CHECK-NEXT:    st1w { z5.s }, p0, [x9, #5, mul vl]
+; CHECK-NEXT:    st1w { z6.s }, p0, [x9, #6, mul vl]
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x10, #1, mul vl]
 ; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x10, #2, mul vl]
 ; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x10, #3, mul vl]

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
index ff8be096cc410..066c7f8002c1e 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
@@ -229,12 +229,12 @@ define <16 x i8> @extract_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec) nounwind
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov x8, #-16
 ; CHECK-NEXT:    mov w9, #16
-; CHECK-NEXT:    sub x8, x8, #16
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    cmp x8, #16
 ; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
+; CHECK-NEXT:    addvl x8, x8, #1
+; CHECK-NEXT:    cmp x8, #16
 ; CHECK-NEXT:    csel x8, x8, x9, lo
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    ldr q0, [x9, x8]

diff  --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll
index a1b64741d5a51..d2445aff408be 100644
--- a/llvm/test/CodeGen/AArch64/sve-gep.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gep.ll
@@ -202,10 +202,8 @@ define <vscale x 2 x i64*> @scalable_of_fixed_5_i64(i64* %base, <vscale x 2 x i3
 define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_1(<vscale x 2 x i64>* %base) {
 ; CHECK-LABEL: scalable_of_scalable_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    rdvl x8, #1
-; CHECK-NEXT:    mov z1.d, x0
-; CHECK-NEXT:    mov z0.d, x8
-; CHECK-NEXT:    add z0.d, z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, x0
+; CHECK-NEXT:    incd z0.d, all, mul #8
 ; CHECK-NEXT:    ret
   %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
   %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <vscale x 2 x i64> %idx
@@ -215,9 +213,7 @@ define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_1(<vscale x 2 x
 define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_2(<vscale x 2 x <vscale x 2 x i64>*> %base) {
 ; CHECK-LABEL: scalable_of_scalable_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    rdvl x8, #1
-; CHECK-NEXT:    mov z1.d, x8
-; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    incd z0.d, all, mul #8
 ; CHECK-NEXT:    ret
   %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
   %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x <vscale x 2 x i64>*> %base, <vscale x 2 x i64> %idx

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index aa543b1b46855..95ecf2582f762 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -503,16 +503,16 @@ define <vscale x 32 x i1> @test_predicate_insert_32xi1(<vscale x 32 x i1> %val,
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    rdvl x8, #2
+; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x9, w1
-; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    mov x10, sp
-; CHECK-NEXT:    cmp x9, x8
 ; CHECK-NEXT:    mov z0.b, p1/z, #1 // =0x1
-; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    ptrue p1.b
+; CHECK-NEXT:    addvl x8, x8, #2
+; CHECK-NEXT:    cmp x9, x8
 ; CHECK-NEXT:    st1b { z0.b }, p1, [x10, #1, mul vl]
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    mov z0.b, p0/z, #1 // =0x1
 ; CHECK-NEXT:    st1b { z0.b }, p1, [sp]
 ; CHECK-NEXT:    strb w0, [x10, x8]

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index 9839ee51b77d2..49b3e85328552 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -139,12 +139,12 @@ define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec, <
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov x8, #-16
 ; CHECK-NEXT:    mov w9, #16
-; CHECK-NEXT:    sub x8, x8, #16
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    cmp x8, #16
 ; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
+; CHECK-NEXT:    addvl x8, x8, #1
+; CHECK-NEXT:    cmp x8, #16
 ; CHECK-NEXT:    csel x8, x8, x9, lo
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    str q1, [x9, x8]

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
index b37e3d8b8c82e..85ecb33151141 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
@@ -1,4 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-scalar-inc-vl < %s | FileCheck %s -check-prefix=USE_SCALAR_INC
 
 ;
 ; CNTB
@@ -6,16 +8,28 @@
 
 define i64 @cntb() {
 ; CHECK-LABEL: cntb:
-; CHECK: cntb x0, vl2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntb x0, vl2
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntb:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntb x0, vl2
+; USE_SCALAR_INC-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.cntb(i32 2)
   ret i64 %out
 }
 
 define i64 @cntb_mul3() {
 ; CHECK-LABEL: cntb_mul3:
-; CHECK: cntb x0, vl6, mul #3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntb x0, vl6, mul #3
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntb_mul3:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntb x0, vl6, mul #3
+; USE_SCALAR_INC-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6)
   %out = mul i64 %cnt, 3
   ret i64 %out
@@ -23,8 +37,14 @@ define i64 @cntb_mul3() {
 
 define i64 @cntb_mul4() {
 ; CHECK-LABEL: cntb_mul4:
-; CHECK: cntb x0, vl8, mul #4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntb x0, vl8, mul #4
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntb_mul4:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntb x0, vl8, mul #4
+; USE_SCALAR_INC-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 8)
   %out = mul i64 %cnt, 4
   ret i64 %out
@@ -36,16 +56,28 @@ define i64 @cntb_mul4() {
 
 define i64 @cnth() {
 ; CHECK-LABEL: cnth:
-; CHECK: cnth x0, vl3
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnth x0, vl3
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cnth:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cnth x0, vl3
+; USE_SCALAR_INC-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.cnth(i32 3)
   ret i64 %out
 }
 
 define i64 @cnth_mul5() {
 ; CHECK-LABEL: cnth_mul5:
-; CHECK: cnth x0, vl7, mul #5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnth x0, vl7, mul #5
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cnth_mul5:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cnth x0, vl7, mul #5
+; USE_SCALAR_INC-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 7)
   %out = mul i64 %cnt, 5
   ret i64 %out
@@ -53,8 +85,14 @@ define i64 @cnth_mul5() {
 
 define i64 @cnth_mul8() {
 ; CHECK-LABEL: cnth_mul8:
-; CHECK: cnth x0, vl5, mul #8
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnth x0, vl5, mul #8
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cnth_mul8:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cnth x0, vl5, mul #8
+; USE_SCALAR_INC-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 5)
   %out = mul i64 %cnt, 8
   ret i64 %out
@@ -66,16 +104,28 @@ define i64 @cnth_mul8() {
 
 define i64 @cntw() {
 ; CHECK-LABEL: cntw:
-; CHECK: cntw x0, vl4
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntw x0, vl4
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntw:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntw x0, vl4
+; USE_SCALAR_INC-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.cntw(i32 4)
   ret i64 %out
 }
 
 define i64 @cntw_mul11() {
 ; CHECK-LABEL: cntw_mul11:
-; CHECK: cntw x0, vl8, mul #11
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntw x0, vl8, mul #11
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntw_mul11:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntw x0, vl8, mul #11
+; USE_SCALAR_INC-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 8)
   %out = mul i64 %cnt, 11
   ret i64 %out
@@ -83,8 +133,14 @@ define i64 @cntw_mul11() {
 
 define i64 @cntw_mul2() {
 ; CHECK-LABEL: cntw_mul2:
-; CHECK: cntw x0, vl6, mul #2
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntw x0, vl6, mul #2
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntw_mul2:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntw x0, vl6, mul #2
+; USE_SCALAR_INC-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 6)
   %out = mul i64 %cnt, 2
   ret i64 %out
@@ -96,16 +152,28 @@ define i64 @cntw_mul2() {
 
 define i64 @cntd() {
 ; CHECK-LABEL: cntd:
-; CHECK: cntd x0, vl5
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntd x0, vl5
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntd:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntd x0, vl5
+; USE_SCALAR_INC-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.cntd(i32 5)
   ret i64 %out
 }
 
 define i64 @cntd_mul15() {
 ; CHECK-LABEL: cntd_mul15:
-; CHECK: cntd x0, vl16, mul #15
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntd x0, vl16, mul #15
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntd_mul15:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntd x0, vl16, mul #15
+; USE_SCALAR_INC-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 9)
   %out = mul i64 %cnt, 15
   ret i64 %out
@@ -113,8 +181,14 @@ define i64 @cntd_mul15() {
 
 define i64 @cntd_mul16() {
 ; CHECK-LABEL: cntd_mul16:
-; CHECK: cntd x0, vl32, mul #16
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntd x0, vl32, mul #16
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntd_mul16:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntd x0, vl32, mul #16
+; USE_SCALAR_INC-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 10)
   %out = mul i64 %cnt, 16
   ret i64 %out
@@ -126,8 +200,14 @@ define i64 @cntd_mul16() {
 
 define i64 @cntp_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 ; CHECK-LABEL: cntp_b8:
-; CHECK: cntp x0, p0, p1.b
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntp x0, p0, p1.b
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntp_b8:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntp x0, p0, p1.b
+; USE_SCALAR_INC-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg,
                                                  <vscale x 16 x i1> %a)
   ret i64 %out
@@ -135,8 +215,14 @@ define i64 @cntp_b8(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
 
 define i64 @cntp_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a) {
 ; CHECK-LABEL: cntp_b16:
-; CHECK: cntp x0, p0, p1.h
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntp x0, p0, p1.h
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntp_b16:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntp x0, p0, p1.h
+; USE_SCALAR_INC-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg,
                                                 <vscale x 8 x i1> %a)
   ret i64 %out
@@ -144,8 +230,14 @@ define i64 @cntp_b16(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %a) {
 
 define i64 @cntp_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a) {
 ; CHECK-LABEL: cntp_b32:
-; CHECK: cntp x0, p0, p1.s
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntp x0, p0, p1.s
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntp_b32:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntp x0, p0, p1.s
+; USE_SCALAR_INC-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg,
                                                 <vscale x 4 x i1> %a)
   ret i64 %out
@@ -153,13 +245,311 @@ define i64 @cntp_b32(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %a) {
 
 define i64 @cntp_b64(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %a) {
 ; CHECK-LABEL: cntp_b64:
-; CHECK: cntp x0, p0, p1.d
-; CHECK-NEXT: ret
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntp x0, p0, p1.d
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: cntp_b64:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    cntp x0, p0, p1.d
+; USE_SCALAR_INC-NEXT:    ret
   %out = call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg,
                                                 <vscale x 2 x i1> %a)
   ret i64 %out
 }
 
+;
+; INCB
+;
+
+define i64 @incb(i64 %a) {
+; CHECK-LABEL: incb:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntb x8, vl5
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: incb:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    incb x0, vl5
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 5)
+  %out = add i64 %cnt, %a
+  ret i64 %out
+}
+
+define i64 @incb_mul(i64 %a) {
+; CHECK-LABEL: incb_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntb x8, vl4
+; CHECK-NEXT:    add x0, x0, x8, lsl #2
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: incb_mul:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    incb x0, vl4, mul #4
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 4)
+  %mul = mul i64 %cnt, 4
+  %out = add i64 %mul, %a
+  ret i64 %out
+}
+
+;
+; DECB
+;
+
+define i64 @decb(i64 %a) {
+; CHECK-LABEL: decb:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntb x8, vl6
+; CHECK-NEXT:    sub x0, x0, x8
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: decb:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    decb x0, vl6
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6)
+  %out = sub i64 %a, %cnt
+  ret i64 %out
+}
+
+define i64 @decb_mul(i64 %a) {
+; CHECK-LABEL: decb_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntb x8, vl7
+; CHECK-NEXT:    sub x0, x0, x8, lsl #3
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: decb_mul:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    decb x0, vl7, mul #8
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntb(i32 7)
+  %mul = mul i64 %cnt, 8
+  %out = sub i64 %a, %mul
+  ret i64 %out
+}
+
+;
+; INCH
+;
+
+define i64 @inch(i64 %a) {
+; CHECK-LABEL: inch:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnth x8, vl4
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: inch:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    inch x0, vl4
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 4)
+  %out = add i64 %cnt, %a
+  ret i64 %out
+}
+
+define i64 @inch_mul(i64 %a) {
+; CHECK-LABEL: inch_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnth x8, vl8, mul #5
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: inch_mul:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    inch x0, vl8, mul #5
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 8)
+  %mul = mul i64 %cnt, 5
+  %out = add i64 %mul, %a
+  ret i64 %out
+}
+
+;
+; DECH
+;
+
+define i64 @dech(i64 %a) {
+; CHECK-LABEL: dech:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnth x8, vl1
+; CHECK-NEXT:    sub x0, x0, x8
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: dech:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    dech x0, vl1
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 1)
+  %out = sub i64 %a, %cnt
+  ret i64 %out
+}
+
+define i64 @dech_mul(i64 %a) {
+; CHECK-LABEL: dech_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cnth x8, vl16, mul #7
+; CHECK-NEXT:    sub x0, x0, x8
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: dech_mul:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    dech x0, vl16, mul #7
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cnth(i32 9)
+  %mul = mul i64 %cnt, 7
+  %out = sub i64 %a, %mul
+  ret i64 %out
+}
+
+;
+; INCW
+;
+
+define i64 @incw(i64 %a) {
+; CHECK-LABEL: incw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntw x8, #16
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: incw:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    incw x0, #16
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 16)
+  %out = add i64 %cnt, %a
+  ret i64 %out
+}
+
+define i64 @incw_mul(i64 %a) {
+; CHECK-LABEL: incw_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntw x8, vl32, mul #12
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: incw_mul:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    incw x0, vl32, mul #12
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 10)
+  %mul = mul i64 %cnt, 12
+  %out = add i64 %mul, %a
+  ret i64 %out
+}
+
+;
+; DECW
+;
+
+define i64 @decw(i64 %a) {
+; CHECK-LABEL: decw:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntw x8
+; CHECK-NEXT:    sub x0, x0, x8
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: decw:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    decw x0
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
+  %out = sub i64 %a, %cnt
+  ret i64 %out
+}
+
+define i64 @decw_mul(i64 %a) {
+; CHECK-LABEL: decw_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntw x8, vl128
+; CHECK-NEXT:    sub x0, x0, x8, lsl #4
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: decw_mul:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    decw x0, vl128, mul #16
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntw(i32 12)
+  %mul = mul i64 %cnt, 16
+  %out = sub i64 %a, %mul
+  ret i64 %out
+}
+
+define i64 @incd(i64 %a) {
+; CHECK-LABEL: incd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntd x8, vl8
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: incd:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    incd x0, vl8
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 8)
+  %out = add i64 %cnt, %a
+  ret i64 %out
+}
+
+define i64 @incd_mul(i64 %a) {
+; CHECK-LABEL: incd_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntd x8, all, mul #15
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: incd_mul:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    incd x0, all, mul #15
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
+  %mul = mul i64 %cnt, 15
+  %out = add i64 %mul, %a
+  ret i64 %out
+}
+
+;
+; DECD
+;
+
+define i64 @decd(i64 %a) {
+; CHECK-LABEL: decd:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntd x8, #16
+; CHECK-NEXT:    sub x0, x0, x8
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: decd:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    decd x0, #16
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 16)
+  %out = sub i64 %a, %cnt
+  ret i64 %out
+}
+
+define i64 @decd_mul(i64 %a) {
+; CHECK-LABEL: decd_mul:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    cntd x8, vl2, mul #9
+; CHECK-NEXT:    sub x0, x0, x8
+; CHECK-NEXT:    ret
+;
+; USE_SCALAR_INC-LABEL: decd_mul:
+; USE_SCALAR_INC:       // %bb.0:
+; USE_SCALAR_INC-NEXT:    decd x0, vl2, mul #9
+; USE_SCALAR_INC-NEXT:    ret
+  %cnt = call i64 @llvm.aarch64.sve.cntd(i32 2)
+  %mul = mul i64 %cnt, 9
+  %out = sub i64 %a, %mul
+  ret i64 %out
+}
+
 declare i64 @llvm.aarch64.sve.cntb(i32 %pattern)
 declare i64 @llvm.aarch64.sve.cnth(i32 %pattern)
 declare i64 @llvm.aarch64.sve.cntw(i32 %pattern)

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
index 97ed67cc3d8bd..56d2ff25cb15e 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
@@ -23,15 +23,15 @@ define i8 @split_extract_32i8_idx(<vscale x 32 x i8> %a, i32 %idx) {
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    rdvl x8, #2
+; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x9, w0
-; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    mov x10, sp
-; CHECK-NEXT:    cmp x9, x8
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    csel x8, x9, x8, lo
+; CHECK-NEXT:    addvl x8, x8, #2
+; CHECK-NEXT:    cmp x9, x8
 ; CHECK-NEXT:    st1b { z1.b }, p0, [x10, #1, mul vl]
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
 ; CHECK-NEXT:    ldrb w0, [x10, x8]
 ; CHECK-NEXT:    addvl sp, sp, #2
@@ -48,15 +48,15 @@ define i16 @split_extract_16i16_idx(<vscale x 16 x i16> %a, i32 %idx) {
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x9, w0
-; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    mov x10, sp
-; CHECK-NEXT:    cmp x9, x8
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    csel x8, x9, x8, lo
+; CHECK-NEXT:    addvl x8, x8, #1
+; CHECK-NEXT:    cmp x9, x8
 ; CHECK-NEXT:    st1h { z1.h }, p0, [x10, #1, mul vl]
+; CHECK-NEXT:    csel x8, x9, x8, lo
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
 ; CHECK-NEXT:    ldrh w0, [x10, x8, lsl #1]
 ; CHECK-NEXT:    addvl sp, sp, #2
@@ -145,15 +145,15 @@ define i16 @split_extract_16i16(<vscale x 16 x i16> %a) {
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    mov w10, #128
-; CHECK-NEXT:    cmp x8, #128
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    csel x8, x8, x10, lo
+; CHECK-NEXT:    addvl x8, x8, #1
 ; CHECK-NEXT:    st1h { z1.h }, p0, [x9, #1, mul vl]
+; CHECK-NEXT:    cmp x8, #128
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]
+; CHECK-NEXT:    csel x8, x8, x10, lo
 ; CHECK-NEXT:    ldrh w0, [x9, x8, lsl #1]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -169,19 +169,19 @@ define i32 @split_extract_16i32(<vscale x 16 x i32> %a) {
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    rdvl x9, #1
+; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov w10, #34464
 ; CHECK-NEXT:    movk w10, #1, lsl #16
-; CHECK-NEXT:    sub x9, x9, #1
-; CHECK-NEXT:    mov x8, sp
-; CHECK-NEXT:    cmp x9, x10
-; CHECK-NEXT:    csel x9, x9, x10, lo
+; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    st1w { z3.s }, p0, [x8, #3, mul vl]
-; CHECK-NEXT:    st1w { z2.s }, p0, [x8, #2, mul vl]
-; CHECK-NEXT:    st1w { z1.s }, p0, [x8, #1, mul vl]
+; CHECK-NEXT:    addvl x8, x8, #1
+; CHECK-NEXT:    cmp x8, x10
+; CHECK-NEXT:    st1w { z3.s }, p0, [x9, #3, mul vl]
+; CHECK-NEXT:    csel x8, x8, x10, lo
+; CHECK-NEXT:    st1w { z2.s }, p0, [x9, #2, mul vl]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x9, #1, mul vl]
 ; CHECK-NEXT:    st1w { z0.s }, p0, [sp]
-; CHECK-NEXT:    ldr w0, [x8, x9, lsl #2]
+; CHECK-NEXT:    ldr w0, [x9, x8, lsl #2]
 ; CHECK-NEXT:    addvl sp, sp, #4
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
index 8f4ba66f208ac..0b9baa23a11fc 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll
@@ -23,14 +23,14 @@ define <vscale x 32 x i8> @split_insert_32i8_idx(<vscale x 32 x i8> %a, i8 %elt,
 ; CHECK-NEXT:    addvl sp, sp, #-2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    rdvl x8, #2
+; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    cmp x1, x8
-; CHECK-NEXT:    csel x8, x1, x8, lo
+; CHECK-NEXT:    addvl x8, x8, #2
 ; CHECK-NEXT:    st1b { z1.b }, p0, [x9, #1, mul vl]
+; CHECK-NEXT:    cmp x1, x8
 ; CHECK-NEXT:    st1b { z0.b }, p0, [sp]
+; CHECK-NEXT:    csel x8, x1, x8, lo
 ; CHECK-NEXT:    strb w0, [x9, x8]
 ; CHECK-NEXT:    ld1b { z1.b }, p0/z, [x9, #1, mul vl]
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [sp]
@@ -135,14 +135,14 @@ define <vscale x 32 x i16> @split_insert_32i16(<vscale x 32 x i16> %a, i16 %elt)
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    rdvl x8, #2
+; CHECK-NEXT:    mov x8, #-1
 ; CHECK-NEXT:    mov w10, #128
-; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    mov x9, sp
-; CHECK-NEXT:    cmp x8, #128
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    csel x8, x8, x10, lo
+; CHECK-NEXT:    addvl x8, x8, #2
+; CHECK-NEXT:    cmp x8, #128
 ; CHECK-NEXT:    st1h { z3.h }, p0, [x9, #3, mul vl]
+; CHECK-NEXT:    csel x8, x8, x10, lo
 ; CHECK-NEXT:    st1h { z2.h }, p0, [x9, #2, mul vl]
 ; CHECK-NEXT:    st1h { z1.h }, p0, [x9, #1, mul vl]
 ; CHECK-NEXT:    st1h { z0.h }, p0, [sp]

diff  --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll
index 71e69e33d8c66..8dd8562670ccd 100644
--- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll
@@ -48,10 +48,9 @@ entry:
 define <vscale x 4 x i64> @stepvector_nxv4i64() {
 ; CHECK-LABEL: stepvector_nxv4i64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cntd x8
 ; CHECK-NEXT:    index z0.d, #0, #1
-; CHECK-NEXT:    mov z1.d, x8
-; CHECK-NEXT:    add z1.d, z0.d, z1.d
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    incd z1.d
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
@@ -61,14 +60,13 @@ entry:
 define <vscale x 16 x i32> @stepvector_nxv16i32() {
 ; CHECK-LABEL: stepvector_nxv16i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    cntw x9
-; CHECK-NEXT:    cnth x8
 ; CHECK-NEXT:    index z0.s, #0, #1
-; CHECK-NEXT:    mov z1.s, w9
-; CHECK-NEXT:    mov z3.s, w8
-; CHECK-NEXT:    add z1.s, z0.s, z1.s
-; CHECK-NEXT:    add z2.s, z0.s, z3.s
-; CHECK-NEXT:    add z3.s, z1.s, z3.s
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    mov z2.d, z0.d
+; CHECK-NEXT:    incw z1.s
+; CHECK-NEXT:    incw z2.s, all, mul #2
+; CHECK-NEXT:    mov z3.d, z1.d
+; CHECK-NEXT:    incw z3.s, all, mul #2
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()

diff  --git a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
new file mode 100644
index 0000000000000..6323850515322
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
@@ -0,0 +1,425 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s -check-prefix=NO_SCALAR_INC
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-scalar-inc-vl -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 8 x i16> @inch_vec(<vscale x 8 x i16> %a) {
+; NO_SCALAR_INC-LABEL: inch_vec:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    inch z0.h
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: inch_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    inch z0.h
+; CHECK-NEXT:    ret
+  %vscale = call i16 @llvm.vscale.i16()
+  %mul = mul i16 %vscale, 8
+  %vl = insertelement <vscale x 8 x i16> undef, i16 %mul, i32 0
+  %vl.splat = shufflevector <vscale x 8 x i16> %vl, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %res = add <vscale x 8 x i16> %a, %vl.splat
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @incw_vec(<vscale x 4 x i32> %a) {
+; NO_SCALAR_INC-LABEL: incw_vec:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    incw z0.s
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: incw_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    incw z0.s
+; CHECK-NEXT:    ret
+  %vscale = call i32 @llvm.vscale.i32()
+  %mul = mul i32 %vscale, 4
+  %vl = insertelement <vscale x 4 x i32> undef, i32 %mul, i32 0
+  %vl.splat = shufflevector <vscale x 4 x i32> %vl, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %res = add <vscale x 4 x i32> %a, %vl.splat
+  ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @incd_vec(<vscale x 2 x i64> %a) {
+; NO_SCALAR_INC-LABEL: incd_vec:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    incd z0.d
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: incd_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    incd z0.d
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 2
+  %vl = insertelement <vscale x 2 x i64> undef, i64 %mul, i32 0
+  %vl.splat = shufflevector <vscale x 2 x i64> %vl, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %res = add <vscale x 2 x i64> %a, %vl.splat
+  ret <vscale x 2 x i64> %res
+}
+
+define <vscale x 8 x i16> @dech_vec(<vscale x 8 x i16> %a) {
+; NO_SCALAR_INC-LABEL: dech_vec:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    dech z0.h, all, mul #2
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: dech_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dech z0.h, all, mul #2
+; CHECK-NEXT:    ret
+  %vscale = call i16 @llvm.vscale.i16()
+  %mul = mul i16 %vscale, 16
+  %vl = insertelement <vscale x 8 x i16> undef, i16 %mul, i32 0
+  %vl.splat = shufflevector <vscale x 8 x i16> %vl, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %res = sub <vscale x 8 x i16> %a, %vl.splat
+  ret <vscale x 8 x i16> %res
+}
+
+define <vscale x 4 x i32> @decw_vec(<vscale x 4 x i32> %a) {
+; NO_SCALAR_INC-LABEL: decw_vec:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    decw z0.s, all, mul #4
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: decw_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    decw z0.s, all, mul #4
+; CHECK-NEXT:    ret
+  %vscale = call i32 @llvm.vscale.i32()
+  %mul = mul i32 %vscale, 16
+  %vl = insertelement <vscale x 4 x i32> undef, i32 %mul, i32 0
+  %vl.splat = shufflevector <vscale x 4 x i32> %vl, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %res = sub <vscale x 4 x i32> %a, %vl.splat
+  ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 2 x i64> @decd_vec(<vscale x 2 x i64> %a) {
+; NO_SCALAR_INC-LABEL: decd_vec:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    decd z0.d, all, mul #8
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: decd_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    decd z0.d, all, mul #8
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 16
+  %vl = insertelement <vscale x 2 x i64> undef, i64 %mul, i32 0
+  %vl.splat = shufflevector <vscale x 2 x i64> %vl, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %res = sub <vscale x 2 x i64> %a, %vl.splat
+  ret <vscale x 2 x i64> %res
+}
+
+; NOTE: As there is no need for the predicate pattern we
+; fall back to using ADDVL with its larger immediate range.
+define i64 @incb_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: incb_scalar_i64:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    addvl x0, x0, #1
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: incb_scalar_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addvl x0, x0, #1
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 16
+  %add = add i64 %a, %mul
+  ret i64 %add
+}
+
+define i64 @inch_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: inch_scalar_i64:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cnth x8
+; NO_SCALAR_INC-NEXT:    add x0, x0, x8
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: inch_scalar_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    inch x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 8
+  %add = add i64 %a, %mul
+  ret i64 %add
+}
+
+define i64 @incw_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: incw_scalar_i64:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cntw x8
+; NO_SCALAR_INC-NEXT:    add x0, x0, x8
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: incw_scalar_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    incw x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 4
+  %add = add i64 %a, %mul
+  ret i64 %add
+}
+
+define i64 @incd_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: incd_scalar_i64:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cntd x8
+; NO_SCALAR_INC-NEXT:    add x0, x0, x8
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: incd_scalar_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    incd x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 2
+  %add = add i64 %a, %mul
+  ret i64 %add
+}
+
+; NOTE: As there is no need for the predicate pattern we
+; fall back to using ADDVL with its larger immediate range.
+define i64 @decb_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: decb_scalar_i64:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    addvl x0, x0, #-2
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: decb_scalar_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    addvl x0, x0, #-2
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 32
+  %sub = sub i64 %a, %mul
+  ret i64 %sub
+}
+
+define i64 @dech_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: dech_scalar_i64:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cnth x8, all, mul #3
+; NO_SCALAR_INC-NEXT:    neg x8, x8
+; NO_SCALAR_INC-NEXT:    add x0, x0, x8
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: dech_scalar_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    dech x0, all, mul #3
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 24
+  %sub = sub i64 %a, %mul
+  ret i64 %sub
+}
+
+define i64 @decw_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: decw_scalar_i64:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cntw x8, all, mul #3
+; NO_SCALAR_INC-NEXT:    neg x8, x8
+; NO_SCALAR_INC-NEXT:    add x0, x0, x8
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: decw_scalar_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    decw x0, all, mul #3
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 12
+  %sub = sub i64 %a, %mul
+  ret i64 %sub
+}
+
+define i64 @decd_scalar_i64(i64 %a) {
+; NO_SCALAR_INC-LABEL: decd_scalar_i64:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cntd x8, all, mul #3
+; NO_SCALAR_INC-NEXT:    neg x8, x8
+; NO_SCALAR_INC-NEXT:    add x0, x0, x8
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: decd_scalar_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    decd x0, all, mul #3
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 6
+  %sub = sub i64 %a, %mul
+  ret i64 %sub
+}
+
+; NOTE: As there is no need for the predicate pattern we
+; fall back to using ADDVL with its larger immediate range.
+define i32 @incb_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: incb_scalar_i32:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    // kill: def $w0 killed $w0 def $x0
+; NO_SCALAR_INC-NEXT:    addvl x0, x0, #3
+; NO_SCALAR_INC-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; NO_SCALAR_INC-NEXT:    ret
+
+; CHECK-LABEL: incb_scalar_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    addvl x0, x0, #3
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 48
+  %vl = trunc i64 %mul to i32
+  %add = add i32 %a, %vl
+  ret i32 %add
+}
+
+define i32 @inch_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: inch_scalar_i32:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cnth x8, all, mul #7
+; NO_SCALAR_INC-NEXT:    add w0, w0, w8
+; NO_SCALAR_INC-NEXT:    ret
+
+; CHECK-LABEL: inch_scalar_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    inch x0, all, mul #7
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 56
+  %vl = trunc i64 %mul to i32
+  %add = add i32 %a, %vl
+  ret i32 %add
+}
+
+define i32 @incw_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: incw_scalar_i32:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cntw x8, all, mul #7
+; NO_SCALAR_INC-NEXT:    add w0, w0, w8
+; NO_SCALAR_INC-NEXT:    ret
+
+; CHECK-LABEL: incw_scalar_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    incw x0, all, mul #7
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 28
+  %vl = trunc i64 %mul to i32
+  %add = add i32 %a, %vl
+  ret i32 %add
+}
+
+define i32 @incd_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: incd_scalar_i32:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cntd x8, all, mul #7
+; NO_SCALAR_INC-NEXT:    add w0, w0, w8
+; NO_SCALAR_INC-NEXT:    ret
+
+; CHECK-LABEL: incd_scalar_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    incd x0, all, mul #7
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 14
+  %vl = trunc i64 %mul to i32
+  %add = add i32 %a, %vl
+  ret i32 %add
+}
+
+; NOTE: As there is no need for the predicate pattern we
+; fall back to using ADDVL with its larger immediate range.
+define i32 @decb_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: decb_scalar_i32:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    // kill: def $w0 killed $w0 def $x0
+; NO_SCALAR_INC-NEXT:    addvl x0, x0, #-4
+; NO_SCALAR_INC-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; NO_SCALAR_INC-NEXT:    ret
+
+; CHECK-LABEL: decb_scalar_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    addvl x0, x0, #-4
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 64
+  %vl = trunc i64 %mul to i32
+  %sub = sub i32 %a, %vl
+  ret i32 %sub
+}
+
+define i32 @dech_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: dech_scalar_i32:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cnth x8
+; NO_SCALAR_INC-NEXT:    neg x8, x8
+; NO_SCALAR_INC-NEXT:    add w0, w0, w8
+; NO_SCALAR_INC-NEXT:    ret
+
+; CHECK-LABEL: dech_scalar_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    dech x0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 8
+  %vl = trunc i64 %mul to i32
+  %sub = sub i32 %a, %vl
+  ret i32 %sub
+}
+
+define i32 @decw_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: decw_scalar_i32:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cntw x8
+; NO_SCALAR_INC-NEXT:    neg x8, x8
+; NO_SCALAR_INC-NEXT:    add w0, w0, w8
+; NO_SCALAR_INC-NEXT:    ret
+
+; CHECK-LABEL: decw_scalar_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    decw x0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 4
+  %vl = trunc i64 %mul to i32
+  %sub = sub i32 %a, %vl
+  ret i32 %sub
+}
+
+define i32 @decd_scalar_i32(i32 %a) {
+; NO_SCALAR_INC-LABEL: decd_scalar_i32:
+; NO_SCALAR_INC:       // %bb.0:
+; NO_SCALAR_INC-NEXT:    cntd x8
+; NO_SCALAR_INC-NEXT:    neg x8, x8
+; NO_SCALAR_INC-NEXT:    add w0, w0, w8
+; NO_SCALAR_INC-NEXT:    ret
+;
+; CHECK-LABEL: decd_scalar_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    decd x0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    ret
+  %vscale = call i64 @llvm.vscale.i64()
+  %mul = mul i64 %vscale, 2
+  %vl = trunc i64 %mul to i32
+  %sub = sub i32 %a, %vl
+  ret i32 %sub
+}
+
+declare i16 @llvm.vscale.i16()
+declare i32 @llvm.vscale.i32()
+declare i64 @llvm.vscale.i64()


        


More information about the llvm-commits mailing list