[llvm] 8699efb - [AArch64][SVE] Fix bad PTEST(PTRUE_ALL, PTEST_LIKE) optimization

Cullen Rhodes via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 15 04:43:33 PST 2022


Author: Cullen Rhodes
Date: 2022-11-15T12:43:21Z
New Revision: 8699efba6dc8d905d144727eb10ff6782376bb87

URL: https://github.com/llvm/llvm-project/commit/8699efba6dc8d905d144727eb10ff6782376bb87
DIFF: https://github.com/llvm/llvm-project/commit/8699efba6dc8d905d144727eb10ff6782376bb87.diff

LOG: [AArch64][SVE] Fix bad PTEST(PTRUE_ALL, PTEST_LIKE) optimization

AArch64InstrInfo::optimizePTestInstr attempts to remove a PTEST of a
predicate generating operation that identically sets flags (implictly).

When the mask is an all active of matching element size the PTEST is
currently removed. For while instructions this is correct since they
perform an implicit PTEST with an all active mask. However, for other
instructions such as compares the mask could be different.

This patch fixes this bug by only removing the PTEST if the same all
active mask is used by the predicating-generating instruction.

Reviewed By: bsmith

Differential Revision: https://reviews.llvm.org/D137718

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
    llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b802358bcd95..35a4ea9d142b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1308,8 +1308,9 @@ bool AArch64InstrInfo::optimizePTestInstr(
   bool PredIsWhileLike = isWhileOpcode(PredOpcode);
 
   if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
-    // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
-    // deactivate any lanes OTHER_INST might set.
+    // For PTEST(PTRUE_ALL, WHILE), if the element size matches the PTEST is
+    // redundant since WHILE performs an implicit PTEST with an all active
+    // mask.
     uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
     uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
 
@@ -1318,6 +1319,15 @@ bool AArch64InstrInfo::optimizePTestInstr(
         (Mask->getOperand(1).getImm() != 31))
       return false;
 
+    // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
+    // PTEST_LIKE instruction uses the same all active mask and the element
+    // size matches.
+    if (PredIsPTestLike) {
+      auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+      if (Mask != PTestLikeMask)
+        return false;
+    }
+
     // Fallthough to simply remove the PTEST.
   } else if (PredIsPTestLike) {
     // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
index 2eae4f324ee2..2e9c5704bc31 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.ll
@@ -37,7 +37,9 @@ define i32 @cmpeq_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmpeq_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmpeq_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
index 48ef72c9b389..81318aa5c2a5 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
@@ -170,7 +170,7 @@ body:             |
     liveins: $p0, $z0
 
     ; CHECK-LABEL: name: cmpeq_imm_nxv16i8
-    ; CHECK-NOT: PTEST
+    ; CHECK: PTEST
     %1:zpr = COPY $z0
     %0:ppr_3b = COPY $p0
     %2:ppr = CMPEQ_PPzZI_B %0, %1, 0, implicit-def dead $nzcv

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
index a99ded97a440..5cf6e205fa7c 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpge.ll
@@ -37,7 +37,9 @@ define i32 @cmpge_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmpge_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmpge_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
index 09878914b24b..927137c7c29f 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpgt.ll
@@ -37,7 +37,9 @@ define i32 @cmpgt_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmpgt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmpgt_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
index 6fca640bda2e..f8ff3e8637e5 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphi.ll
@@ -38,7 +38,9 @@ define i32 @cmphi_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmphi_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmphi_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmphi p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
index 3b2f679dd2fb..a53becc5c49d 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmphs.ll
@@ -37,7 +37,9 @@ define i32 @cmphs_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmphs_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmphs_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmphs p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
index d2b0be299f9e..651b6f09b314 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmple.ll
@@ -8,7 +8,9 @@
 define i32 @cmple_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmple_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmple p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)
@@ -292,7 +294,9 @@ define i1 @cmp32_ptest_any_xx(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vs
 define i1 @cmp8_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmp8_ptest_first_ax:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -308,7 +312,9 @@ define i1 @cmp8_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <v
 define i1 @cmp8_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmp8_ptest_last_ax:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -323,7 +329,9 @@ define i1 @cmp8_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vs
 define i1 @cmp8_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: cmp8_ptest_any_ax:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpge p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -339,7 +347,9 @@ define i1 @cmp8_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsc
 define i1 @cmp32_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmp32_ptest_first_ax:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -356,7 +366,9 @@ define i1 @cmp32_ptest_first_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <
 define i1 @cmp32_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmp32_ptest_last_ax:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, lo
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
@@ -372,7 +384,9 @@ define i1 @cmp32_ptest_last_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <v
 define i1 @cmp32_ptest_any_ax(<vscale x 16 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: cmp32_ptest_any_ax:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    cmpge p0.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
index c813acfd3652..32d10b2a6043 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplo.ll
@@ -8,7 +8,9 @@
 define i32 @cmplo_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmplo_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmplo p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
index 53cd251f97eb..73f07e9ca02c 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpls.ll
@@ -8,7 +8,9 @@
 define i32 @cmpls_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmpls_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpls p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
index 5b8e739e6b30..efcaddb4b8fc 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmplt.ll
@@ -8,7 +8,9 @@
 define i32 @cmplt_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmplt_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmplt p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> %a)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
index 12c82a0ebf7a..11c828a8291c 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpne.ll
@@ -37,7 +37,9 @@ define i32 @cmpne_nxv4i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale
 define i32 @cmpne_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
 ; CHECK-LABEL: cmpne_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer)

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll
index edd87e6a5a5b..636379c34836 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-match.ll
@@ -20,7 +20,9 @@ define i32 @match_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale
 define i32 @match_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: match_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.match.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
@@ -49,7 +51,9 @@ define i32 @nmatch_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscal
 define i32 @nmatch_imm_nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: nmatch_imm_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    nmatch p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    ptest p1, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nmatch.nxv16i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)


        


More information about the llvm-commits mailing list