[llvm] [AArch64][SVE2] Lower read-after-write mask to whilerw (PR #114028)

Sam Tebbs via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 29 08:11:45 PDT 2024


https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/114028

>From fea72a0fff1bfc0f0a950facf0e7e5404fedc0c0 Mon Sep 17 00:00:00 2001
From: Sam Tebbs <samuel.tebbs at arm.com>
Date: Tue, 29 Oct 2024 10:05:07 +0000
Subject: [PATCH] [AArch64][SVE2] Lower read-after-write mask to whilerw

This patch extends the whilewr matching to also match a read-after-write
mask and lower it to a whilerw.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  34 ++-
 llvm/test/CodeGen/AArch64/whilewr.ll          | 261 +++++++++++++-----
 2 files changed, 223 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index bf2f0674b5b65e..a2517761afc0c9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14189,7 +14189,16 @@ SDValue tryWhileWRFromOR(SDValue Op, SelectionDAG &DAG,
     return SDValue();
 
   SDValue Diff = Cmp.getOperand(0);
-  if (Diff.getOpcode() != ISD::SUB || Diff.getValueType() != MVT::i64)
+  SDValue NonAbsDiff = Diff;
+  bool WriteAfterRead = true;
+  // A read-after-write will have an abs call on the diff
+  if (Diff.getOpcode() == ISD::ABS) {
+    NonAbsDiff = Diff.getOperand(0);
+    WriteAfterRead = false;
+  }
+
+  if (NonAbsDiff.getOpcode() != ISD::SUB ||
+      NonAbsDiff.getValueType() != MVT::i64)
     return SDValue();
 
   if (!isNullConstant(LaneMask.getOperand(1)) ||
@@ -14210,8 +14219,13 @@ SDValue tryWhileWRFromOR(SDValue Op, SelectionDAG &DAG,
       // it's positive, otherwise the difference plus the element size if it's
       // negative: pos_diff = diff < 0 ? (diff + 7) : diff
       SDValue Select = DiffDiv.getOperand(0);
+      SDValue SelectOp3 = Select.getOperand(3);
+      // Check for an abs in the case of a read-after-write
+      if (!WriteAfterRead && SelectOp3.getOpcode() == ISD::ABS)
+        SelectOp3 = SelectOp3.getOperand(0);
+
       // Make sure the difference is being compared by the select
-      if (Select.getOpcode() != ISD::SELECT_CC || Select.getOperand(3) != Diff)
+      if (Select.getOpcode() != ISD::SELECT_CC || SelectOp3 != NonAbsDiff)
         return SDValue();
       // Make sure it's checking if the difference is less than 0
       if (!isNullConstant(Select.getOperand(1)) ||
@@ -14243,22 +14257,26 @@ SDValue tryWhileWRFromOR(SDValue Op, SelectionDAG &DAG,
   } else if (LaneMask.getOperand(2) != Diff)
     return SDValue();
 
-  SDValue StorePtr = Diff.getOperand(0);
-  SDValue ReadPtr = Diff.getOperand(1);
+  SDValue StorePtr = NonAbsDiff.getOperand(0);
+  SDValue ReadPtr = NonAbsDiff.getOperand(1);
 
   unsigned IntrinsicID = 0;
   switch (EltSize) {
   case 1:
-    IntrinsicID = Intrinsic::aarch64_sve_whilewr_b;
+    IntrinsicID = WriteAfterRead ? Intrinsic::aarch64_sve_whilewr_b
+                                 : Intrinsic::aarch64_sve_whilerw_b;
     break;
   case 2:
-    IntrinsicID = Intrinsic::aarch64_sve_whilewr_h;
+    IntrinsicID = WriteAfterRead ? Intrinsic::aarch64_sve_whilewr_h
+                                 : Intrinsic::aarch64_sve_whilerw_h;
     break;
   case 4:
-    IntrinsicID = Intrinsic::aarch64_sve_whilewr_s;
+    IntrinsicID = WriteAfterRead ? Intrinsic::aarch64_sve_whilewr_s
+                                 : Intrinsic::aarch64_sve_whilerw_s;
     break;
   case 8:
-    IntrinsicID = Intrinsic::aarch64_sve_whilewr_d;
+    IntrinsicID = WriteAfterRead ? Intrinsic::aarch64_sve_whilewr_d
+                                 : Intrinsic::aarch64_sve_whilerw_d;
     break;
   default:
     return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/whilewr.ll b/llvm/test/CodeGen/AArch64/whilewr.ll
index 9f1ea850792384..ec59e42feb6a4f 100644
--- a/llvm/test/CodeGen/AArch64/whilewr.ll
+++ b/llvm/test/CodeGen/AArch64/whilewr.ll
@@ -30,6 +30,36 @@ entry:
   ret <vscale x 16 x i1> %active.lane.mask.alias
 }
 
+define <vscale x 16 x i1> @whilerw_8(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
+; CHECK-LABEL: whilerw_8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    whilerw p0.b, x2, x1
+; CHECK-NEXT:    ret
+;
+; CHECK-NOSVE2-LABEL: whilerw_8:
+; CHECK-NOSVE2:       // %bb.0: // %entry
+; CHECK-NOSVE2-NEXT:    subs x8, x2, x1
+; CHECK-NOSVE2-NEXT:    cneg x8, x8, mi
+; CHECK-NOSVE2-NEXT:    cmp x8, #0
+; CHECK-NOSVE2-NEXT:    cset w9, lt
+; CHECK-NOSVE2-NEXT:    whilelo p0.b, xzr, x8
+; CHECK-NOSVE2-NEXT:    sbfx x8, x9, #0, #1
+; CHECK-NOSVE2-NEXT:    whilelo p1.b, xzr, x8
+; CHECK-NOSVE2-NEXT:    sel p0.b, p0, p0.b, p1.b
+; CHECK-NOSVE2-NEXT:    ret
+entry:
+  %b24 = ptrtoint ptr %b to i64
+  %c25 = ptrtoint ptr %c to i64
+  %sub.diff = sub i64 %c25, %b24
+  %0 = tail call i64 @llvm.abs.i64(i64 %sub.diff, i1 false)
+  %neg.compare = icmp slt i64 %0, 0
+  %.splatinsert = insertelement <vscale x 16 x i1> poison, i1 %neg.compare, i64 0
+  %.splat = shufflevector <vscale x 16 x i1> %.splatinsert, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
+  %ptr.diff.lane.mask = tail call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 %0)
+  %active.lane.mask.alias = or <vscale x 16 x i1> %ptr.diff.lane.mask, %.splat
+  ret <vscale x 16 x i1> %active.lane.mask.alias
+}
+
 define <vscale x 16 x i1> @whilewr_commutative(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_commutative:
 ; CHECK:       // %bb.0: // %entry
@@ -89,6 +119,39 @@ entry:
   ret <vscale x 8 x i1> %active.lane.mask.alias
 }
 
+define <vscale x 8 x i1> @whilerw_16(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
+; CHECK-LABEL: whilerw_16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    whilerw p0.h, x2, x1
+; CHECK-NEXT:    ret
+;
+; CHECK-NOSVE2-LABEL: whilerw_16:
+; CHECK-NOSVE2:       // %bb.0: // %entry
+; CHECK-NOSVE2-NEXT:    subs x8, x2, x1
+; CHECK-NOSVE2-NEXT:    cneg x8, x8, mi
+; CHECK-NOSVE2-NEXT:    cmn x8, #1
+; CHECK-NOSVE2-NEXT:    add x8, x8, x8, lsr #63
+; CHECK-NOSVE2-NEXT:    cset w9, lt
+; CHECK-NOSVE2-NEXT:    sbfx x9, x9, #0, #1
+; CHECK-NOSVE2-NEXT:    asr x8, x8, #1
+; CHECK-NOSVE2-NEXT:    whilelo p0.h, xzr, x9
+; CHECK-NOSVE2-NEXT:    whilelo p1.h, xzr, x8
+; CHECK-NOSVE2-NEXT:    mov p0.b, p1/m, p1.b
+; CHECK-NOSVE2-NEXT:    ret
+entry:
+  %b24 = ptrtoint ptr %b to i64
+  %c25 = ptrtoint ptr %c to i64
+  %sub.diff = sub i64 %c25, %b24
+  %0 = tail call i64 @llvm.abs.i64(i64 %sub.diff, i1 false)
+  %diff = sdiv i64 %0, 2
+  %neg.compare = icmp slt i64 %0, -1
+  %.splatinsert = insertelement <vscale x 8 x i1> poison, i1 %neg.compare, i64 0
+  %.splat = shufflevector <vscale x 8 x i1> %.splatinsert, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+  %ptr.diff.lane.mask = tail call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 %diff)
+  %active.lane.mask.alias = or <vscale x 8 x i1> %ptr.diff.lane.mask, %.splat
+  ret <vscale x 8 x i1> %active.lane.mask.alias
+}
+
 define <vscale x 4 x i1> @whilewr_32(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_32:
 ; CHECK:       // %bb.0: // %entry
@@ -122,6 +185,41 @@ entry:
   ret <vscale x 4 x i1> %active.lane.mask.alias
 }
 
+define <vscale x 4 x i1> @whilerw_32(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
+; CHECK-LABEL: whilerw_32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    whilerw p0.s, x2, x1
+; CHECK-NEXT:    ret
+;
+; CHECK-NOSVE2-LABEL: whilerw_32:
+; CHECK-NOSVE2:       // %bb.0: // %entry
+; CHECK-NOSVE2-NEXT:    subs x8, x2, x1
+; CHECK-NOSVE2-NEXT:    cneg x8, x8, mi
+; CHECK-NOSVE2-NEXT:    add x9, x8, #3
+; CHECK-NOSVE2-NEXT:    cmp x8, #0
+; CHECK-NOSVE2-NEXT:    csel x9, x9, x8, lt
+; CHECK-NOSVE2-NEXT:    cmn x8, #3
+; CHECK-NOSVE2-NEXT:    cset w8, lt
+; CHECK-NOSVE2-NEXT:    asr x9, x9, #2
+; CHECK-NOSVE2-NEXT:    sbfx x8, x8, #0, #1
+; CHECK-NOSVE2-NEXT:    whilelo p1.s, xzr, x9
+; CHECK-NOSVE2-NEXT:    whilelo p0.s, xzr, x8
+; CHECK-NOSVE2-NEXT:    mov p0.b, p1/m, p1.b
+; CHECK-NOSVE2-NEXT:    ret
+entry:
+  %b24 = ptrtoint ptr %b to i64
+  %c25 = ptrtoint ptr %c to i64
+  %sub.diff = sub i64 %c25, %b24
+  %0 = tail call i64 @llvm.abs.i64(i64 %sub.diff, i1 false)
+  %diff = sdiv i64 %0, 4
+  %neg.compare = icmp slt i64 %0, -3
+  %.splatinsert = insertelement <vscale x 4 x i1> poison, i1 %neg.compare, i64 0
+  %.splat = shufflevector <vscale x 4 x i1> %.splatinsert, <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer
+  %ptr.diff.lane.mask = tail call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 %diff)
+  %active.lane.mask.alias = or <vscale x 4 x i1> %ptr.diff.lane.mask, %.splat
+  ret <vscale x 4 x i1> %active.lane.mask.alias
+}
+
 define <vscale x 2 x i1> @whilewr_64(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_64:
 ; CHECK:       // %bb.0: // %entry
@@ -155,6 +253,41 @@ entry:
   ret <vscale x 2 x i1> %active.lane.mask.alias
 }
 
+define <vscale x 2 x i1> @whilerw_64(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
+; CHECK-LABEL: whilerw_64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    whilerw p0.d, x2, x1
+; CHECK-NEXT:    ret
+;
+; CHECK-NOSVE2-LABEL: whilerw_64:
+; CHECK-NOSVE2:       // %bb.0: // %entry
+; CHECK-NOSVE2-NEXT:    subs x8, x2, x1
+; CHECK-NOSVE2-NEXT:    cneg x8, x8, mi
+; CHECK-NOSVE2-NEXT:    add x9, x8, #7
+; CHECK-NOSVE2-NEXT:    cmp x8, #0
+; CHECK-NOSVE2-NEXT:    csel x9, x9, x8, lt
+; CHECK-NOSVE2-NEXT:    cmn x8, #7
+; CHECK-NOSVE2-NEXT:    cset w8, lt
+; CHECK-NOSVE2-NEXT:    asr x9, x9, #3
+; CHECK-NOSVE2-NEXT:    sbfx x8, x8, #0, #1
+; CHECK-NOSVE2-NEXT:    whilelo p1.d, xzr, x9
+; CHECK-NOSVE2-NEXT:    whilelo p0.d, xzr, x8
+; CHECK-NOSVE2-NEXT:    mov p0.b, p1/m, p1.b
+; CHECK-NOSVE2-NEXT:    ret
+entry:
+  %b24 = ptrtoint ptr %b to i64
+  %c25 = ptrtoint ptr %c to i64
+  %sub.diff = sub i64 %c25, %b24
+  %0 = tail call i64 @llvm.abs.i64(i64 %sub.diff, i1 false)
+  %diff = sdiv i64 %0, 8
+  %neg.compare = icmp slt i64 %0, -7
+  %.splatinsert = insertelement <vscale x 2 x i1> poison, i1 %neg.compare, i64 0
+  %.splat = shufflevector <vscale x 2 x i1> %.splatinsert, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer
+  %ptr.diff.lane.mask = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 %diff)
+  %active.lane.mask.alias = or <vscale x 2 x i1> %ptr.diff.lane.mask, %.splat
+  ret <vscale x 2 x i1> %active.lane.mask.alias
+}
+
 define <vscale x 1 x i1> @no_whilewr_128(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: no_whilewr_128:
 ; CHECK:       // %bb.0: // %entry
@@ -212,7 +345,7 @@ define void @whilewr_loop_8(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_loop_8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmp w3, #1
-; CHECK-NEXT:    b.lt .LBB6_3
+; CHECK-NEXT:    b.lt .LBB10_3
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NEXT:    whilewr p0.b, x1, x2
 ; CHECK-NEXT:    mov w9, w3
@@ -220,7 +353,7 @@ define void @whilewr_loop_8(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    whilelo p1.b, xzr, x9
 ; CHECK-NEXT:    cntp x10, p0, p0.b
 ; CHECK-NEXT:    and x10, x10, #0xff
-; CHECK-NEXT:  .LBB6_2: // %vector.body
+; CHECK-NEXT:  .LBB10_2: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    and p1.b, p1/z, p1.b, p0.b
 ; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x0, x8]
@@ -229,14 +362,14 @@ define void @whilewr_loop_8(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    st1b { z0.b }, p1, [x2, x8]
 ; CHECK-NEXT:    add x8, x8, x10
 ; CHECK-NEXT:    whilelo p1.b, x8, x9
-; CHECK-NEXT:    b.mi .LBB6_2
-; CHECK-NEXT:  .LBB6_3: // %for.cond.cleanup
+; CHECK-NEXT:    b.mi .LBB10_2
+; CHECK-NEXT:  .LBB10_3: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NOSVE2-LABEL: whilewr_loop_8:
 ; CHECK-NOSVE2:       // %bb.0: // %entry
 ; CHECK-NOSVE2-NEXT:    cmp w3, #1
-; CHECK-NOSVE2-NEXT:    b.lt .LBB6_3
+; CHECK-NOSVE2-NEXT:    b.lt .LBB10_3
 ; CHECK-NOSVE2-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NOSVE2-NEXT:    sub x9, x1, x2
 ; CHECK-NOSVE2-NEXT:    mov x8, xzr
@@ -250,7 +383,7 @@ define void @whilewr_loop_8(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    whilelo p1.b, xzr, x9
 ; CHECK-NOSVE2-NEXT:    cntp x10, p0, p0.b
 ; CHECK-NOSVE2-NEXT:    and x10, x10, #0xff
-; CHECK-NOSVE2-NEXT:  .LBB6_2: // %vector.body
+; CHECK-NOSVE2-NEXT:  .LBB10_2: // %vector.body
 ; CHECK-NOSVE2-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NOSVE2-NEXT:    and p1.b, p1/z, p1.b, p0.b
 ; CHECK-NOSVE2-NEXT:    ld1b { z0.b }, p1/z, [x0, x8]
@@ -259,8 +392,8 @@ define void @whilewr_loop_8(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    st1b { z0.b }, p1, [x2, x8]
 ; CHECK-NOSVE2-NEXT:    add x8, x8, x10
 ; CHECK-NOSVE2-NEXT:    whilelo p1.b, x8, x9
-; CHECK-NOSVE2-NEXT:    b.mi .LBB6_2
-; CHECK-NOSVE2-NEXT:  .LBB6_3: // %for.cond.cleanup
+; CHECK-NOSVE2-NEXT:    b.mi .LBB10_2
+; CHECK-NOSVE2-NEXT:  .LBB10_3: // %for.cond.cleanup
 ; CHECK-NOSVE2-NEXT:    ret
 entry:
   %cmp11 = icmp sgt i32 %n, 0
@@ -306,14 +439,14 @@ define void @whilewr_loop_16(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_loop_16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmp w3, #1
-; CHECK-NEXT:    b.lt .LBB7_3
+; CHECK-NEXT:    b.lt .LBB11_3
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NEXT:    mov w8, w3
 ; CHECK-NEXT:    whilewr p1.h, x1, x2
 ; CHECK-NEXT:    mov x9, xzr
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    and p0.b, p1/z, p1.b, p0.b
-; CHECK-NEXT:  .LBB7_2: // %vector.body
+; CHECK-NEXT:  .LBB11_2: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0, x9, lsl #1]
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x1, x9, lsl #1]
@@ -321,14 +454,14 @@ define void @whilewr_loop_16(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x2, x9, lsl #1]
 ; CHECK-NEXT:    inch x9
 ; CHECK-NEXT:    whilelo p0.h, x9, x8
-; CHECK-NEXT:    b.mi .LBB7_2
-; CHECK-NEXT:  .LBB7_3: // %for.cond.cleanup
+; CHECK-NEXT:    b.mi .LBB11_2
+; CHECK-NEXT:  .LBB11_3: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NOSVE2-LABEL: whilewr_loop_16:
 ; CHECK-NOSVE2:       // %bb.0: // %entry
 ; CHECK-NOSVE2-NEXT:    cmp w3, #1
-; CHECK-NOSVE2-NEXT:    b.lt .LBB7_3
+; CHECK-NOSVE2-NEXT:    b.lt .LBB11_3
 ; CHECK-NOSVE2-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NOSVE2-NEXT:    mov w9, w3
 ; CHECK-NOSVE2-NEXT:    sub x10, x1, x2
@@ -344,7 +477,7 @@ define void @whilewr_loop_16(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    cnth x10
 ; CHECK-NOSVE2-NEXT:    mov p1.b, p2/m, p2.b
 ; CHECK-NOSVE2-NEXT:    and p0.b, p1/z, p1.b, p0.b
-; CHECK-NOSVE2-NEXT:  .LBB7_2: // %vector.body
+; CHECK-NOSVE2-NEXT:  .LBB11_2: // %vector.body
 ; CHECK-NOSVE2-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NOSVE2-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
 ; CHECK-NOSVE2-NEXT:    ld1h { z1.h }, p0/z, [x1, x8, lsl #1]
@@ -352,8 +485,8 @@ define void @whilewr_loop_16(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    st1h { z0.h }, p0, [x2, x8, lsl #1]
 ; CHECK-NOSVE2-NEXT:    add x8, x8, x10
 ; CHECK-NOSVE2-NEXT:    whilelo p0.h, x8, x9
-; CHECK-NOSVE2-NEXT:    b.mi .LBB7_2
-; CHECK-NOSVE2-NEXT:  .LBB7_3: // %for.cond.cleanup
+; CHECK-NOSVE2-NEXT:    b.mi .LBB11_2
+; CHECK-NOSVE2-NEXT:  .LBB11_3: // %for.cond.cleanup
 ; CHECK-NOSVE2-NEXT:    ret
 entry:
   %cmp11 = icmp sgt i32 %n, 0
@@ -399,14 +532,14 @@ define void @whilewr_loop_32(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_loop_32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmp w3, #1
-; CHECK-NEXT:    b.lt .LBB8_3
+; CHECK-NEXT:    b.lt .LBB12_3
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NEXT:    mov w8, w3
 ; CHECK-NEXT:    whilewr p1.s, x1, x2
 ; CHECK-NEXT:    mov x9, xzr
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    and p0.b, p1/z, p1.b, p0.b
-; CHECK-NEXT:  .LBB8_2: // %vector.body
+; CHECK-NEXT:  .LBB12_2: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x9, lsl #2]
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x1, x9, lsl #2]
@@ -414,14 +547,14 @@ define void @whilewr_loop_32(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x2, x9, lsl #2]
 ; CHECK-NEXT:    incw x9
 ; CHECK-NEXT:    whilelo p0.s, x9, x8
-; CHECK-NEXT:    b.mi .LBB8_2
-; CHECK-NEXT:  .LBB8_3: // %for.cond.cleanup
+; CHECK-NEXT:    b.mi .LBB12_2
+; CHECK-NEXT:  .LBB12_3: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NOSVE2-LABEL: whilewr_loop_32:
 ; CHECK-NOSVE2:       // %bb.0: // %entry
 ; CHECK-NOSVE2-NEXT:    cmp w3, #1
-; CHECK-NOSVE2-NEXT:    b.lt .LBB8_3
+; CHECK-NOSVE2-NEXT:    b.lt .LBB12_3
 ; CHECK-NOSVE2-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NOSVE2-NEXT:    mov w9, w3
 ; CHECK-NOSVE2-NEXT:    sub x10, x1, x2
@@ -439,7 +572,7 @@ define void @whilewr_loop_32(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    cntw x10
 ; CHECK-NOSVE2-NEXT:    mov p1.b, p2/m, p2.b
 ; CHECK-NOSVE2-NEXT:    and p0.b, p1/z, p1.b, p0.b
-; CHECK-NOSVE2-NEXT:  .LBB8_2: // %vector.body
+; CHECK-NOSVE2-NEXT:  .LBB12_2: // %vector.body
 ; CHECK-NOSVE2-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NOSVE2-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
 ; CHECK-NOSVE2-NEXT:    ld1w { z1.s }, p0/z, [x1, x8, lsl #2]
@@ -447,8 +580,8 @@ define void @whilewr_loop_32(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    st1w { z0.s }, p0, [x2, x8, lsl #2]
 ; CHECK-NOSVE2-NEXT:    add x8, x8, x10
 ; CHECK-NOSVE2-NEXT:    whilelo p0.s, x8, x9
-; CHECK-NOSVE2-NEXT:    b.mi .LBB8_2
-; CHECK-NOSVE2-NEXT:  .LBB8_3: // %for.cond.cleanup
+; CHECK-NOSVE2-NEXT:    b.mi .LBB12_2
+; CHECK-NOSVE2-NEXT:  .LBB12_3: // %for.cond.cleanup
 ; CHECK-NOSVE2-NEXT:    ret
 entry:
   %cmp9 = icmp sgt i32 %n, 0
@@ -494,14 +627,14 @@ define void @whilewr_loop_64(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_loop_64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmp w3, #1
-; CHECK-NEXT:    b.lt .LBB9_3
+; CHECK-NEXT:    b.lt .LBB13_3
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NEXT:    mov w8, w3
 ; CHECK-NEXT:    whilewr p1.d, x1, x2
 ; CHECK-NEXT:    mov x9, xzr
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    and p0.b, p1/z, p1.b, p0.b
-; CHECK-NEXT:  .LBB9_2: // %vector.body
+; CHECK-NEXT:  .LBB13_2: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, x9, lsl #3]
 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x1, x9, lsl #3]
@@ -509,14 +642,14 @@ define void @whilewr_loop_64(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x2, x9, lsl #3]
 ; CHECK-NEXT:    incd x9
 ; CHECK-NEXT:    whilelo p0.d, x9, x8
-; CHECK-NEXT:    b.mi .LBB9_2
-; CHECK-NEXT:  .LBB9_3: // %for.cond.cleanup
+; CHECK-NEXT:    b.mi .LBB13_2
+; CHECK-NEXT:  .LBB13_3: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NOSVE2-LABEL: whilewr_loop_64:
 ; CHECK-NOSVE2:       // %bb.0: // %entry
 ; CHECK-NOSVE2-NEXT:    cmp w3, #1
-; CHECK-NOSVE2-NEXT:    b.lt .LBB9_3
+; CHECK-NOSVE2-NEXT:    b.lt .LBB13_3
 ; CHECK-NOSVE2-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NOSVE2-NEXT:    mov w9, w3
 ; CHECK-NOSVE2-NEXT:    sub x10, x1, x2
@@ -534,7 +667,7 @@ define void @whilewr_loop_64(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    cntd x10
 ; CHECK-NOSVE2-NEXT:    mov p1.b, p2/m, p2.b
 ; CHECK-NOSVE2-NEXT:    and p0.b, p1/z, p1.b, p0.b
-; CHECK-NOSVE2-NEXT:  .LBB9_2: // %vector.body
+; CHECK-NOSVE2-NEXT:  .LBB13_2: // %vector.body
 ; CHECK-NOSVE2-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NOSVE2-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
 ; CHECK-NOSVE2-NEXT:    ld1d { z1.d }, p0/z, [x1, x8, lsl #3]
@@ -542,8 +675,8 @@ define void @whilewr_loop_64(ptr noalias %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    st1d { z0.d }, p0, [x2, x8, lsl #3]
 ; CHECK-NOSVE2-NEXT:    add x8, x8, x10
 ; CHECK-NOSVE2-NEXT:    whilelo p0.d, x8, x9
-; CHECK-NOSVE2-NEXT:    b.mi .LBB9_2
-; CHECK-NOSVE2-NEXT:  .LBB9_3: // %for.cond.cleanup
+; CHECK-NOSVE2-NEXT:    b.mi .LBB13_2
+; CHECK-NOSVE2-NEXT:  .LBB13_3: // %for.cond.cleanup
 ; CHECK-NOSVE2-NEXT:    ret
 entry:
   %cmp9 = icmp sgt i32 %n, 0
@@ -589,7 +722,7 @@ define void @whilewr_loop_multiple_8(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_loop_multiple_8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmp w3, #1
-; CHECK-NEXT:    b.lt .LBB10_3
+; CHECK-NEXT:    b.lt .LBB14_3
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NEXT:    whilewr p0.b, x0, x2
 ; CHECK-NEXT:    mov w9, w3
@@ -599,7 +732,7 @@ define void @whilewr_loop_multiple_8(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    whilelo p1.b, xzr, x9
 ; CHECK-NEXT:    cntp x10, p0, p0.b
 ; CHECK-NEXT:    and x10, x10, #0xff
-; CHECK-NEXT:  .LBB10_2: // %vector.body
+; CHECK-NEXT:  .LBB14_2: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    and p1.b, p1/z, p1.b, p0.b
 ; CHECK-NEXT:    ld1b { z0.b }, p1/z, [x0, x8]
@@ -608,14 +741,14 @@ define void @whilewr_loop_multiple_8(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    st1b { z0.b }, p1, [x2, x8]
 ; CHECK-NEXT:    add x8, x8, x10
 ; CHECK-NEXT:    whilelo p1.b, x8, x9
-; CHECK-NEXT:    b.mi .LBB10_2
-; CHECK-NEXT:  .LBB10_3: // %for.cond.cleanup
+; CHECK-NEXT:    b.mi .LBB14_2
+; CHECK-NEXT:  .LBB14_3: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NOSVE2-LABEL: whilewr_loop_multiple_8:
 ; CHECK-NOSVE2:       // %bb.0: // %entry
 ; CHECK-NOSVE2-NEXT:    cmp w3, #1
-; CHECK-NOSVE2-NEXT:    b.lt .LBB10_3
+; CHECK-NOSVE2-NEXT:    b.lt .LBB14_3
 ; CHECK-NOSVE2-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NOSVE2-NEXT:    sub x9, x0, x2
 ; CHECK-NOSVE2-NEXT:    mov x8, xzr
@@ -637,7 +770,7 @@ define void @whilewr_loop_multiple_8(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    whilelo p1.b, xzr, x9
 ; CHECK-NOSVE2-NEXT:    cntp x10, p0, p0.b
 ; CHECK-NOSVE2-NEXT:    and x10, x10, #0xff
-; CHECK-NOSVE2-NEXT:  .LBB10_2: // %vector.body
+; CHECK-NOSVE2-NEXT:  .LBB14_2: // %vector.body
 ; CHECK-NOSVE2-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NOSVE2-NEXT:    and p1.b, p1/z, p1.b, p0.b
 ; CHECK-NOSVE2-NEXT:    ld1b { z0.b }, p1/z, [x0, x8]
@@ -646,8 +779,8 @@ define void @whilewr_loop_multiple_8(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    st1b { z0.b }, p1, [x2, x8]
 ; CHECK-NOSVE2-NEXT:    add x8, x8, x10
 ; CHECK-NOSVE2-NEXT:    whilelo p1.b, x8, x9
-; CHECK-NOSVE2-NEXT:    b.mi .LBB10_2
-; CHECK-NOSVE2-NEXT:  .LBB10_3: // %for.cond.cleanup
+; CHECK-NOSVE2-NEXT:    b.mi .LBB14_2
+; CHECK-NOSVE2-NEXT:  .LBB14_3: // %for.cond.cleanup
 ; CHECK-NOSVE2-NEXT:    ret
 entry:
   %cmp11 = icmp sgt i32 %n, 0
@@ -701,7 +834,7 @@ define void @whilewr_loop_multiple_16(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_loop_multiple_16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmp w3, #1
-; CHECK-NEXT:    b.lt .LBB11_3
+; CHECK-NEXT:    b.lt .LBB15_3
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NEXT:    whilewr p0.h, x0, x2
 ; CHECK-NEXT:    mov w9, w3
@@ -711,7 +844,7 @@ define void @whilewr_loop_multiple_16(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    whilelo p1.h, xzr, x9
 ; CHECK-NEXT:    cntp x10, p0, p0.h
 ; CHECK-NEXT:    and x10, x10, #0xff
-; CHECK-NEXT:  .LBB11_2: // %vector.body
+; CHECK-NEXT:  .LBB15_2: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    and p1.b, p1/z, p1.b, p0.b
 ; CHECK-NEXT:    ld1h { z0.h }, p1/z, [x0, x8, lsl #1]
@@ -720,14 +853,14 @@ define void @whilewr_loop_multiple_16(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    st1h { z0.h }, p1, [x2, x8, lsl #1]
 ; CHECK-NEXT:    add x8, x8, x10
 ; CHECK-NEXT:    whilelo p1.h, x8, x9
-; CHECK-NEXT:    b.mi .LBB11_2
-; CHECK-NEXT:  .LBB11_3: // %for.cond.cleanup
+; CHECK-NEXT:    b.mi .LBB15_2
+; CHECK-NEXT:  .LBB15_3: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NOSVE2-LABEL: whilewr_loop_multiple_16:
 ; CHECK-NOSVE2:       // %bb.0: // %entry
 ; CHECK-NOSVE2-NEXT:    cmp w3, #1
-; CHECK-NOSVE2-NEXT:    b.lt .LBB11_3
+; CHECK-NOSVE2-NEXT:    b.lt .LBB15_3
 ; CHECK-NOSVE2-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NOSVE2-NEXT:    sub x9, x0, x2
 ; CHECK-NOSVE2-NEXT:    mov x8, xzr
@@ -753,7 +886,7 @@ define void @whilewr_loop_multiple_16(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    whilelo p1.h, xzr, x9
 ; CHECK-NOSVE2-NEXT:    cntp x10, p0, p0.h
 ; CHECK-NOSVE2-NEXT:    and x10, x10, #0xff
-; CHECK-NOSVE2-NEXT:  .LBB11_2: // %vector.body
+; CHECK-NOSVE2-NEXT:  .LBB15_2: // %vector.body
 ; CHECK-NOSVE2-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NOSVE2-NEXT:    and p1.b, p1/z, p1.b, p0.b
 ; CHECK-NOSVE2-NEXT:    ld1h { z0.h }, p1/z, [x0, x8, lsl #1]
@@ -762,8 +895,8 @@ define void @whilewr_loop_multiple_16(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    st1h { z0.h }, p1, [x2, x8, lsl #1]
 ; CHECK-NOSVE2-NEXT:    add x8, x8, x10
 ; CHECK-NOSVE2-NEXT:    whilelo p1.h, x8, x9
-; CHECK-NOSVE2-NEXT:    b.mi .LBB11_2
-; CHECK-NOSVE2-NEXT:  .LBB11_3: // %for.cond.cleanup
+; CHECK-NOSVE2-NEXT:    b.mi .LBB15_2
+; CHECK-NOSVE2-NEXT:  .LBB15_3: // %for.cond.cleanup
 ; CHECK-NOSVE2-NEXT:    ret
 entry:
   %cmp11 = icmp sgt i32 %n, 0
@@ -819,7 +952,7 @@ define void @whilewr_loop_multiple_32(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_loop_multiple_32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmp w3, #1
-; CHECK-NEXT:    b.lt .LBB12_3
+; CHECK-NEXT:    b.lt .LBB16_3
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NEXT:    whilewr p0.s, x0, x2
 ; CHECK-NEXT:    mov w9, w3
@@ -829,7 +962,7 @@ define void @whilewr_loop_multiple_32(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    whilelo p1.s, xzr, x9
 ; CHECK-NEXT:    cntp x10, p0, p0.s
 ; CHECK-NEXT:    and x10, x10, #0xff
-; CHECK-NEXT:  .LBB12_2: // %vector.body
+; CHECK-NEXT:  .LBB16_2: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    and p1.b, p1/z, p1.b, p0.b
 ; CHECK-NEXT:    ld1w { z0.s }, p1/z, [x0, x8, lsl #2]
@@ -838,14 +971,14 @@ define void @whilewr_loop_multiple_32(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    st1w { z0.s }, p1, [x2, x8, lsl #2]
 ; CHECK-NEXT:    add x8, x8, x10
 ; CHECK-NEXT:    whilelo p1.s, x8, x9
-; CHECK-NEXT:    b.mi .LBB12_2
-; CHECK-NEXT:  .LBB12_3: // %for.cond.cleanup
+; CHECK-NEXT:    b.mi .LBB16_2
+; CHECK-NEXT:  .LBB16_3: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NOSVE2-LABEL: whilewr_loop_multiple_32:
 ; CHECK-NOSVE2:       // %bb.0: // %entry
 ; CHECK-NOSVE2-NEXT:    cmp w3, #1
-; CHECK-NOSVE2-NEXT:    b.lt .LBB12_3
+; CHECK-NOSVE2-NEXT:    b.lt .LBB16_3
 ; CHECK-NOSVE2-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NOSVE2-NEXT:    sub x9, x0, x2
 ; CHECK-NOSVE2-NEXT:    mov x8, xzr
@@ -875,7 +1008,7 @@ define void @whilewr_loop_multiple_32(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    whilelo p1.s, xzr, x9
 ; CHECK-NOSVE2-NEXT:    cntp x10, p0, p0.s
 ; CHECK-NOSVE2-NEXT:    and x10, x10, #0xff
-; CHECK-NOSVE2-NEXT:  .LBB12_2: // %vector.body
+; CHECK-NOSVE2-NEXT:  .LBB16_2: // %vector.body
 ; CHECK-NOSVE2-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NOSVE2-NEXT:    and p1.b, p1/z, p1.b, p0.b
 ; CHECK-NOSVE2-NEXT:    ld1w { z0.s }, p1/z, [x0, x8, lsl #2]
@@ -884,8 +1017,8 @@ define void @whilewr_loop_multiple_32(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    st1w { z0.s }, p1, [x2, x8, lsl #2]
 ; CHECK-NOSVE2-NEXT:    add x8, x8, x10
 ; CHECK-NOSVE2-NEXT:    whilelo p1.s, x8, x9
-; CHECK-NOSVE2-NEXT:    b.mi .LBB12_2
-; CHECK-NOSVE2-NEXT:  .LBB12_3: // %for.cond.cleanup
+; CHECK-NOSVE2-NEXT:    b.mi .LBB16_2
+; CHECK-NOSVE2-NEXT:  .LBB16_3: // %for.cond.cleanup
 ; CHECK-NOSVE2-NEXT:    ret
 entry:
   %cmp9 = icmp sgt i32 %n, 0
@@ -941,7 +1074,7 @@ define void @whilewr_loop_multiple_64(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-LABEL: whilewr_loop_multiple_64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cmp w3, #1
-; CHECK-NEXT:    b.lt .LBB13_3
+; CHECK-NEXT:    b.lt .LBB17_3
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NEXT:    whilewr p0.d, x0, x2
 ; CHECK-NEXT:    mov w9, w3
@@ -951,7 +1084,7 @@ define void @whilewr_loop_multiple_64(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    whilelo p1.d, xzr, x9
 ; CHECK-NEXT:    cntp x10, p0, p0.d
 ; CHECK-NEXT:    and x10, x10, #0xff
-; CHECK-NEXT:  .LBB13_2: // %vector.body
+; CHECK-NEXT:  .LBB17_2: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    and p1.b, p1/z, p1.b, p0.b
 ; CHECK-NEXT:    ld1d { z0.d }, p1/z, [x0, x8, lsl #3]
@@ -960,14 +1093,14 @@ define void @whilewr_loop_multiple_64(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NEXT:    st1d { z0.d }, p1, [x2, x8, lsl #3]
 ; CHECK-NEXT:    add x8, x8, x10
 ; CHECK-NEXT:    whilelo p1.d, x8, x9
-; CHECK-NEXT:    b.mi .LBB13_2
-; CHECK-NEXT:  .LBB13_3: // %for.cond.cleanup
+; CHECK-NEXT:    b.mi .LBB17_2
+; CHECK-NEXT:  .LBB17_3: // %for.cond.cleanup
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NOSVE2-LABEL: whilewr_loop_multiple_64:
 ; CHECK-NOSVE2:       // %bb.0: // %entry
 ; CHECK-NOSVE2-NEXT:    cmp w3, #1
-; CHECK-NOSVE2-NEXT:    b.lt .LBB13_3
+; CHECK-NOSVE2-NEXT:    b.lt .LBB17_3
 ; CHECK-NOSVE2-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NOSVE2-NEXT:    sub x9, x0, x2
 ; CHECK-NOSVE2-NEXT:    mov x8, xzr
@@ -997,7 +1130,7 @@ define void @whilewr_loop_multiple_64(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    whilelo p1.d, xzr, x9
 ; CHECK-NOSVE2-NEXT:    cntp x10, p0, p0.d
 ; CHECK-NOSVE2-NEXT:    and x10, x10, #0xff
-; CHECK-NOSVE2-NEXT:  .LBB13_2: // %vector.body
+; CHECK-NOSVE2-NEXT:  .LBB17_2: // %vector.body
 ; CHECK-NOSVE2-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NOSVE2-NEXT:    and p1.b, p1/z, p1.b, p0.b
 ; CHECK-NOSVE2-NEXT:    ld1d { z0.d }, p1/z, [x0, x8, lsl #3]
@@ -1006,8 +1139,8 @@ define void @whilewr_loop_multiple_64(ptr %a, ptr %b, ptr %c, i32 %n) {
 ; CHECK-NOSVE2-NEXT:    st1d { z0.d }, p1, [x2, x8, lsl #3]
 ; CHECK-NOSVE2-NEXT:    add x8, x8, x10
 ; CHECK-NOSVE2-NEXT:    whilelo p1.d, x8, x9
-; CHECK-NOSVE2-NEXT:    b.mi .LBB13_2
-; CHECK-NOSVE2-NEXT:  .LBB13_3: // %for.cond.cleanup
+; CHECK-NOSVE2-NEXT:    b.mi .LBB17_2
+; CHECK-NOSVE2-NEXT:  .LBB17_3: // %for.cond.cleanup
 ; CHECK-NOSVE2-NEXT:    ret
 entry:
   %cmp9 = icmp sgt i32 %n, 0



More information about the llvm-commits mailing list