[llvm] [AArch64] Fix cttz.elts codegen for fixed-length vectors (PR #178902)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 30 07:17:40 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Graham Hunter (huntergr-arm)

<details>
<summary>Changes</summary>

When lowering cttz.elts for fixed-length vectors when SVE is available,
we use scalable container types for the predicate types since NEON doesn't
have dedicated predicate registers. Unfortunately, this also discards the
actual length of the vector to look at if it's shorter than a full vector.
Example codegen, for a llvm.experimental.cttz.elts.i64.v4i1

  shl v0.4h, v0.4h, #<!-- -->15
  ptrue p0.h, vl4
  ptrue p1.h
  cmpne p0.h, p0/z, z0.h, #<!-- -->0
  brkb p0.b, p1/z, p0.b
  cntp x8, p0, p0.h

The 'ptrue p1.h' is where we went wrong -- if p0 is empty, we should only
set 4 lanes active at most, but since brkb's pg operand is all active,
it sets all available lanes (e.g. 8 .h lanes on a 128b SVE implementation).

We need to use the 'vl4' mask for brkb's pg operand instead. So I've added
an extra parameter to AArch64ISD::CTTZ_ELTS to carry the appropriate pattern
from where it's lowered to ISel.

Fixes #<!-- -->178644


---
Full diff: https://github.com/llvm/llvm-project/pull/178902.diff


4 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+6-1) 
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+2-2) 
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+28-28) 
- (modified) llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll (+36-54) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2775ddcff353c..d479dc7de6abf 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6903,16 +6903,21 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     EVT VT = CttzOp.getValueType();
     assert(VT.getVectorElementType() == MVT::i1 && "Expected MVT::i1");
 
+    // Default to all for scalable vectors
+    unsigned PgPattern = AArch64SVEPredPattern::all;
     if (VT.isFixedLengthVector()) {
       // We can use SVE instructions to lower this intrinsic by first creating
       // an SVE predicate register mask from the fixed-width vector.
       EVT NewVT = getTypeToTransformTo(*DAG.getContext(), VT);
       SDValue Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, CttzOp);
       CttzOp = convertFixedMaskToScalableVector(Mask, DAG);
+      // Override with a VLx.
+      PgPattern = *getSVEPredPatternFromNumElements(VT.getVectorNumElements());
     }
 
+    SDValue Pattern = DAG.getTargetConstant(PgPattern, DL, MVT::i32);
     SDValue NewCttzElts =
-        DAG.getNode(AArch64ISD::CTTZ_ELTS, DL, MVT::i64, CttzOp);
+        DAG.getNode(AArch64ISD::CTTZ_ELTS, DL, MVT::i64, CttzOp, Pattern);
     return DAG.getZExtOrTrunc(NewCttzElts, DL, Op.getValueType());
   }
   case Intrinsic::experimental_vector_match: {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index cefec898f4dc7..96c64727d9c24 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1224,8 +1224,8 @@ def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
                             [(AArch64rshrnb node:$rs, node:$i),
                             (int_aarch64_sve_rshrnb node:$rs, node:$i)]>;
 
-def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1,
-                             [SDTCisInt<0>, SDTCisVec<1>]>, []>;
+def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 2,
+                             [SDTCisInt<0>, SDTCisVec<1>, SDTCisInt<2>]>, []>;
 
 // NEON Load/Store with post-increment base updates.
 // TODO: Complete SDTypeProfile constraints.
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index dd90bf2622ec3..36a4b553f22c0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2273,21 +2273,21 @@ let Predicates = [HasSVE_or_SME] in {
   defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>;
   defm CNTP_XPP : sve_int_pcount_pred<0b000, "cntp", int_aarch64_sve_cntp>;
 
-  def : Pat<(i64 (AArch64CttzElts nxv16i1:$Op1)),
-            (CNTP_XPP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op1),
-                        (BRKB_PPzP (PTRUE_B 31), PPR:$Op1))>;
+  def : Pat<(i64 (AArch64CttzElts nxv16i1:$Op1, sve_pred_enum:$Pat)),
+            (CNTP_XPP_B (BRKB_PPzP (PTRUE_B $Pat), PPR:$Op1),
+                        (BRKB_PPzP (PTRUE_B $Pat), PPR:$Op1))>;
 
-  def : Pat<(i64 (AArch64CttzElts nxv8i1:$Op1)),
-            (CNTP_XPP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op1),
-                        (BRKB_PPzP (PTRUE_H 31), PPR:$Op1))>;
+  def : Pat<(i64 (AArch64CttzElts nxv8i1:$Op1, sve_pred_enum:$Pat)),
+            (CNTP_XPP_H (BRKB_PPzP (PTRUE_H $Pat), PPR:$Op1),
+                        (BRKB_PPzP (PTRUE_H $Pat), PPR:$Op1))>;
 
-  def : Pat<(i64 (AArch64CttzElts nxv4i1:$Op1)),
-            (CNTP_XPP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op1),
-                        (BRKB_PPzP (PTRUE_S 31), PPR:$Op1))>;
+  def : Pat<(i64 (AArch64CttzElts nxv4i1:$Op1, sve_pred_enum:$Pat)),
+            (CNTP_XPP_S (BRKB_PPzP (PTRUE_S $Pat), PPR:$Op1),
+                        (BRKB_PPzP (PTRUE_S $Pat), PPR:$Op1))>;
 
-  def : Pat<(i64 (AArch64CttzElts nxv2i1:$Op1)),
-            (CNTP_XPP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op1),
-                        (BRKB_PPzP (PTRUE_D 31), PPR:$Op1))>;
+  def : Pat<(i64 (AArch64CttzElts nxv2i1:$Op1, sve_pred_enum:$Pat)),
+            (CNTP_XPP_D (BRKB_PPzP (PTRUE_D $Pat), PPR:$Op1),
+                        (BRKB_PPzP (PTRUE_D $Pat), PPR:$Op1))>;
 }
 
   defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>;
@@ -2373,35 +2373,35 @@ let Predicates = [HasSVE_or_SME] in {
   defm INCP_ZP     : sve_int_count_v<0b10000, "incp">;
   defm DECP_ZP     : sve_int_count_v<0b10100, "decp">;
 
-  def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv16i1:$Op2)))),
-            (INCP_XP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op2), GPR64:$Op1)>;
+  def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv16i1:$Op2, sve_pred_enum:$Pat)))),
+            (INCP_XP_B (BRKB_PPzP (PTRUE_B $Pat), PPR:$Op2), GPR64:$Op1)>;
 
-  def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv16i1:$Op2))))),
-            (EXTRACT_SUBREG (INCP_XP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op2),
+  def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv16i1:$Op2, sve_pred_enum:$Pat))))),
+            (EXTRACT_SUBREG (INCP_XP_B (BRKB_PPzP (PTRUE_B $Pat), PPR:$Op2),
                                        (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
                             sub_32)>;
 
-  def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv8i1:$Op2)))),
-            (INCP_XP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op2), GPR64:$Op1)>;
+  def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv8i1:$Op2, sve_pred_enum:$Pat)))),
+            (INCP_XP_H (BRKB_PPzP (PTRUE_H $Pat), PPR:$Op2), GPR64:$Op1)>;
 
-  def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv8i1:$Op2))))),
-            (EXTRACT_SUBREG (INCP_XP_H (BRKB_PPzP (PTRUE_H 31), PPR:$Op2),
+  def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv8i1:$Op2, sve_pred_enum:$Pat))))),
+            (EXTRACT_SUBREG (INCP_XP_H (BRKB_PPzP (PTRUE_H $Pat), PPR:$Op2),
                                        (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
                             sub_32)>;
 
-  def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv4i1:$Op2)))),
-            (INCP_XP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op2), GPR64:$Op1)>;
+  def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv4i1:$Op2, sve_pred_enum:$Pat)))),
+            (INCP_XP_S (BRKB_PPzP (PTRUE_S $Pat), PPR:$Op2), GPR64:$Op1)>;
 
-  def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv4i1:$Op2))))),
-            (EXTRACT_SUBREG (INCP_XP_S (BRKB_PPzP (PTRUE_S 31), PPR:$Op2),
+  def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv4i1:$Op2, sve_pred_enum:$Pat))))),
+            (EXTRACT_SUBREG (INCP_XP_S (BRKB_PPzP (PTRUE_S $Pat), PPR:$Op2),
                                        (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
                             sub_32)>;
 
-  def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv2i1:$Op2)))),
-            (INCP_XP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op2), GPR64:$Op1)>;
+  def : Pat<(i64 (add GPR64:$Op1, (i64 (AArch64CttzElts nxv2i1:$Op2, sve_pred_enum:$Pat)))),
+            (INCP_XP_D (BRKB_PPzP (PTRUE_D $Pat), PPR:$Op2), GPR64:$Op1)>;
 
-  def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv2i1:$Op2))))),
-            (EXTRACT_SUBREG (INCP_XP_D (BRKB_PPzP (PTRUE_D 31), PPR:$Op2),
+  def : Pat<(i32 (add GPR32:$Op1, (trunc (i64 (AArch64CttzElts nxv2i1:$Op2, sve_pred_enum:$Pat))))),
+            (EXTRACT_SUBREG (INCP_XP_D (BRKB_PPzP (PTRUE_D $Pat), PPR:$Op2),
                                        (INSERT_SUBREG (IMPLICIT_DEF), GPR32:$Op1, sub_32)),
                             sub_32)>;
 
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index 49a0086a7be54..33e7c69f041d4 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -349,9 +349,8 @@ define i32 @ctz_v16i1(<16 x i1> %a) {
 ; NONSTREAMING:       // %bb.0:
 ; NONSTREAMING-NEXT:    shl v0.16b, v0.16b, #7
 ; NONSTREAMING-NEXT:    ptrue p0.b, vl16
-; NONSTREAMING-NEXT:    ptrue p1.b
-; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cmpne p1.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
 ; NONSTREAMING-NEXT:    ret
 ;
@@ -359,10 +358,9 @@ define i32 @ctz_v16i1(<16 x i1> %a) {
 ; STREAMING:       // %bb.0:
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
 ; STREAMING-NEXT:    ptrue p0.b, vl16
-; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
-; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cmpne p1.b, p0/z, z0.b, #0
+; STREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
@@ -374,9 +372,8 @@ define i32 @ctz_v16i1_poison(<16 x i1> %a) {
 ; NONSTREAMING:       // %bb.0:
 ; NONSTREAMING-NEXT:    shl v0.16b, v0.16b, #7
 ; NONSTREAMING-NEXT:    ptrue p0.b, vl16
-; NONSTREAMING-NEXT:    ptrue p1.b
-; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cmpne p1.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
 ; NONSTREAMING-NEXT:    ret
 ;
@@ -384,10 +381,9 @@ define i32 @ctz_v16i1_poison(<16 x i1> %a) {
 ; STREAMING:       // %bb.0:
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
 ; STREAMING-NEXT:    ptrue p0.b, vl16
-; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
-; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cmpne p1.b, p0/z, z0.b, #0
+; STREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1)
@@ -399,9 +395,8 @@ define i64 @add_i64_ctz_v16i1_poison(<16 x i1> %a, i64 %b) {
 ; NONSTREAMING:       // %bb.0:
 ; NONSTREAMING-NEXT:    shl v0.16b, v0.16b, #7
 ; NONSTREAMING-NEXT:    ptrue p0.b, vl16
-; NONSTREAMING-NEXT:    ptrue p1.b
-; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cmpne p1.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; NONSTREAMING-NEXT:    incp x0, p0.b
 ; NONSTREAMING-NEXT:    ret
 ;
@@ -409,10 +404,9 @@ define i64 @add_i64_ctz_v16i1_poison(<16 x i1> %a, i64 %b) {
 ; STREAMING:       // %bb.0:
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
 ; STREAMING-NEXT:    ptrue p0.b, vl16
-; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
-; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cmpne p1.b, p0/z, z0.b, #0
+; STREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; STREAMING-NEXT:    incp x0, p0.b
 ; STREAMING-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> %a, i1 1)
@@ -425,9 +419,8 @@ define i32 @ctz_v8i1(<8 x i1> %a) {
 ; NONSTREAMING:       // %bb.0:
 ; NONSTREAMING-NEXT:    shl v0.8b, v0.8b, #7
 ; NONSTREAMING-NEXT:    ptrue p0.b, vl8
-; NONSTREAMING-NEXT:    ptrue p1.b
-; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cmpne p1.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
 ; NONSTREAMING-NEXT:    ret
 ;
@@ -435,10 +428,9 @@ define i32 @ctz_v8i1(<8 x i1> %a) {
 ; STREAMING:       // %bb.0:
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
 ; STREAMING-NEXT:    ptrue p0.b, vl8
-; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
-; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cmpne p1.b, p0/z, z0.b, #0
+; STREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 0)
@@ -450,9 +442,8 @@ define i32 @ctz_v8i1_poison(<8 x i1> %a) {
 ; NONSTREAMING:       // %bb.0:
 ; NONSTREAMING-NEXT:    shl v0.8b, v0.8b, #7
 ; NONSTREAMING-NEXT:    ptrue p0.b, vl8
-; NONSTREAMING-NEXT:    ptrue p1.b
-; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cmpne p1.b, p0/z, z0.b, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
 ; NONSTREAMING-NEXT:    ret
 ;
@@ -460,10 +451,9 @@ define i32 @ctz_v8i1_poison(<8 x i1> %a) {
 ; STREAMING:       // %bb.0:
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
 ; STREAMING-NEXT:    ptrue p0.b, vl8
-; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
-; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
-; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cmpne p1.b, p0/z, z0.b, #0
+; STREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 1)
@@ -475,9 +465,8 @@ define i32 @ctz_v4i1(<4 x i1> %a) {
 ; NONSTREAMING:       // %bb.0:
 ; NONSTREAMING-NEXT:    shl v0.4h, v0.4h, #15
 ; NONSTREAMING-NEXT:    ptrue p0.h, vl4
-; NONSTREAMING-NEXT:    ptrue p1.h
-; NONSTREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
-; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cmpne p1.h, p0/z, z0.h, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.h
 ; NONSTREAMING-NEXT:    ret
 ;
@@ -485,10 +474,9 @@ define i32 @ctz_v4i1(<4 x i1> %a) {
 ; STREAMING:       // %bb.0:
 ; STREAMING-NEXT:    lsl z0.h, z0.h, #15
 ; STREAMING-NEXT:    ptrue p0.h, vl4
-; STREAMING-NEXT:    ptrue p1.h
 ; STREAMING-NEXT:    asr z0.h, z0.h, #15
-; STREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
-; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cmpne p1.h, p0/z, z0.h, #0
+; STREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.h
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 0)
@@ -500,9 +488,8 @@ define i32 @ctz_v4i1_poison(<4 x i1> %a) {
 ; NONSTREAMING:       // %bb.0:
 ; NONSTREAMING-NEXT:    shl v0.4h, v0.4h, #15
 ; NONSTREAMING-NEXT:    ptrue p0.h, vl4
-; NONSTREAMING-NEXT:    ptrue p1.h
-; NONSTREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
-; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cmpne p1.h, p0/z, z0.h, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.h
 ; NONSTREAMING-NEXT:    ret
 ;
@@ -510,10 +497,9 @@ define i32 @ctz_v4i1_poison(<4 x i1> %a) {
 ; STREAMING:       // %bb.0:
 ; STREAMING-NEXT:    lsl z0.h, z0.h, #15
 ; STREAMING-NEXT:    ptrue p0.h, vl4
-; STREAMING-NEXT:    ptrue p1.h
 ; STREAMING-NEXT:    asr z0.h, z0.h, #15
-; STREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
-; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cmpne p1.h, p0/z, z0.h, #0
+; STREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.h
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 1)
@@ -525,9 +511,8 @@ define i32 @ctz_v2i1(<2 x i1> %a) {
 ; NONSTREAMING:       // %bb.0:
 ; NONSTREAMING-NEXT:    shl v0.2s, v0.2s, #31
 ; NONSTREAMING-NEXT:    ptrue p0.s, vl2
-; NONSTREAMING-NEXT:    ptrue p1.s
-; NONSTREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
-; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cmpne p1.s, p0/z, z0.s, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.s
 ; NONSTREAMING-NEXT:    ret
 ;
@@ -535,10 +520,9 @@ define i32 @ctz_v2i1(<2 x i1> %a) {
 ; STREAMING:       // %bb.0:
 ; STREAMING-NEXT:    lsl z0.s, z0.s, #31
 ; STREAMING-NEXT:    ptrue p0.s, vl2
-; STREAMING-NEXT:    ptrue p1.s
 ; STREAMING-NEXT:    asr z0.s, z0.s, #31
-; STREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
-; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cmpne p1.s, p0/z, z0.s, #0
+; STREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.s
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 0)
@@ -550,9 +534,8 @@ define i32 @ctz_v2i1_poison(<2 x i1> %a) {
 ; NONSTREAMING:       // %bb.0:
 ; NONSTREAMING-NEXT:    shl v0.2s, v0.2s, #31
 ; NONSTREAMING-NEXT:    ptrue p0.s, vl2
-; NONSTREAMING-NEXT:    ptrue p1.s
-; NONSTREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
-; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; NONSTREAMING-NEXT:    cmpne p1.s, p0/z, z0.s, #0
+; NONSTREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.s
 ; NONSTREAMING-NEXT:    ret
 ;
@@ -560,10 +543,9 @@ define i32 @ctz_v2i1_poison(<2 x i1> %a) {
 ; STREAMING:       // %bb.0:
 ; STREAMING-NEXT:    lsl z0.s, z0.s, #31
 ; STREAMING-NEXT:    ptrue p0.s, vl2
-; STREAMING-NEXT:    ptrue p1.s
 ; STREAMING-NEXT:    asr z0.s, z0.s, #31
-; STREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
-; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
+; STREAMING-NEXT:    cmpne p1.s, p0/z, z0.s, #0
+; STREAMING-NEXT:    brkb p0.b, p0/z, p1.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.s
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 1)

``````````

</details>


https://github.com/llvm/llvm-project/pull/178902


More information about the llvm-commits mailing list