[llvm] [AArch64][SVE2p1] Remove redundant PTESTs when predicate is a WHILEcc_x2 (PR #156478)

Kerry McLaughlin via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 9 07:57:37 PDT 2025


https://github.com/kmclaughlin-arm updated https://github.com/llvm/llvm-project/pull/156478

>From 18964b8eb07ec45f290fbda54d7a334dd93af19f Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 2 Sep 2025 14:11:44 +0000
Subject: [PATCH 1/8] - Tests for whilecc_x2 with ptest

---
 .../AArch64/sve-ptest-removal-whilege.mir     | 50 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilegt.mir     | 50 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilehi.mir     | 50 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilehs.mir     | 50 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilele.mir     | 50 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilelo.mir     | 50 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilels.mir     | 50 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilelt.mir     | 50 +++++++++++++++++++
 8 files changed, 400 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
index 69a2c88d7dbad..441dedc6e9d01 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
@@ -538,3 +538,53 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+
+# WHILEGE (predicate pair)
+---
+name:            whilege_x2_b64_s64
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilege_x2_b64_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILEGE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILEGE_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %4, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
index 58db85aba80ad..305347b60309f 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
@@ -578,3 +578,53 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+
+# WHILEGT (predicate pair)
+---
+name:            whilegt_x2_b64_s64
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilegt_x2_b64_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILEGT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILEGT_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %4, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
index 03d9768258ebc..71195185ecec7 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
@@ -538,3 +538,53 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+
+# WHILEHI (predicate pair)
+---
+name:            whilehi_x2_b64_s64
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilehi_x2_b64_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILEHI_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHI_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILEHI_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %4, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
index 68ecd79c8325b..e8f94857ae958 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
@@ -538,3 +538,53 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+
+# WHILEHS (predicate pair)
+---
+name:            whilehs_x2_b64_s64
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilehs_x2_b64_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILEHS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILEHS_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %4, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
index 16dcb2cebec7e..44e4d8ad0c239 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
@@ -538,3 +538,53 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+
+# WHILELE (predicate pair)
+---
+name:            whilele_x2_b64_s64
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilele_x2_b64_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILELE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILELE_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %4, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
index 06030a786545a..3df9cbda0d187 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
@@ -538,3 +538,53 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+
+# WHILELO (predicate pair)
+---
+name:            whilelo_x2_b64_s64
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilelo_x2_b64_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILELO_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELO_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILELO_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %4, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
index 9b378a83e917e..1dae74fdf9a2c 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
@@ -538,3 +538,53 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+
+# WHILELS (predicate pair)
+---
+name:            whilels_x2_b64_s64
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilels_x2_b64_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILELS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELS_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELS_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILELS_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %4, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
index ef88a8dd848b0..b55aa471cbba9 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
@@ -538,3 +538,53 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+
+# WHILELT (predicate pair)
+---
+name:            whilelt_x2_b64_s64
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilelt_x2_b64_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILELT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELT_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELT_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILELT_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %4, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...

>From b86deb096d559c3d4ee77c9eddb572e5f76bb22a Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Mon, 1 Sep 2025 10:15:11 +0000
Subject: [PATCH 2/8] [AArch64][SVE2p1] Remove redundant PTESTs when predicate
 is a WHILEcc_x2

The optimisation in canRemovePTestInstr tries to remove ptest instructions when
the predicate is the result of a WHILEcc. This patch extends the support to
WHILEcc (predicate pair) by:
 - Including the WHILEcc_x2 intrinsics in isPredicateCCSettingOp, allowing
   performFirstTrueTestVectorCombine to create the PTEST.
 - Setting the isWhile flag for the predicate pair instructions in tablegen.
 - Looking through copies in canRemovePTestInstr to test isWhileOpcode.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 10 +++++++-
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  | 18 ++++++++++++--
 .../lib/Target/AArch64/AArch64RegisterInfo.td | 24 +++++++++----------
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  4 +++-
 .../AArch64/sve-ptest-removal-whilege.mir     |  1 -
 .../AArch64/sve-ptest-removal-whilegt.mir     |  1 -
 .../AArch64/sve-ptest-removal-whilehi.mir     |  1 -
 .../AArch64/sve-ptest-removal-whilehs.mir     |  1 -
 .../AArch64/sve-ptest-removal-whilele.mir     |  1 -
 .../AArch64/sve-ptest-removal-whilelo.mir     |  1 -
 .../AArch64/sve-ptest-removal-whilels.mir     |  1 -
 .../AArch64/sve-ptest-removal-whilelt.mir     |  1 -
 12 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5ffaf2c49b4c0..ecc8c739a98b8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20195,13 +20195,21 @@ static bool isPredicateCCSettingOp(SDValue N) {
       (N.getOpcode() == ISD::GET_ACTIVE_LANE_MASK) ||
       (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
        (N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
+       (N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege_x2 ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt_x2 ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi_x2 ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs_x2 ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele_x2 ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo_x2 ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
-        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt)))
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels_x2 ||
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt_x2))))
     return true;
 
   return false;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b47ae5d2cbb17..521766af0af25 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1488,6 +1488,21 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
   bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
   bool PredIsWhileLike = isWhileOpcode(PredOpcode);
 
+  uint64_t PredEltSize = 0;
+  if (PredIsWhileLike)
+    PredEltSize = getElementSizeForOpcode(PredOpcode);
+
+  if (Pred->isCopy()) {
+    // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
+    // before the branch to extract each subregister.
+    auto Op = Pred->getOperand(1);
+    if (Op.isReg() && Op.getReg().isVirtual() && Op.getSubReg() != 0) {
+      MachineInstr *DefMI = MRI->getVRegDef(Op.getReg());
+      PredIsWhileLike = isWhileOpcode(DefMI->getOpcode());
+      PredEltSize = getElementSizeForOpcode(DefMI->getOpcode());
+    }
+  }
+
   if (PredIsWhileLike) {
     // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
     // instruction and the condition is "any" since WHILcc does an implicit
@@ -1499,8 +1514,7 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
     // redundant since WHILE performs an implicit PTEST with an all active
     // mask.
     if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
-        getElementSizeForOpcode(MaskOpcode) ==
-            getElementSizeForOpcode(PredOpcode))
+        getElementSizeForOpcode(MaskOpcode) == PredEltSize)
       return PredOpcode;
 
     return {};
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 1a7609bfee8a1..47156d8d1a4b7 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1164,23 +1164,21 @@ class PPRVectorListMul<int ElementWidth, int NumRegs> : PPRVectorList<ElementWid
                                                                 ", AArch64::PPRMul2RegClassID>";
 }
 
+class PPR2MulRegOp<string Suffix, int Size, ElementSizeEnum ES> :
+      RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'" # Suffix # "'>"> {
+  ElementSizeEnum ElementSize;
+  let ElementSize = ES;
+  let ParserMatchClass = PPRVectorListMul<Size, 2>;
+}
+
 let EncoderMethod = "EncodeRegMul_MinMax<2, 0, 14>",
     DecoderMethod = "DecodePPR2Mul2RegisterClass" in {
-  def PP_b_mul_r : RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'b'>"> {
-    let ParserMatchClass = PPRVectorListMul<8, 2>;
-  }
 
-  def PP_h_mul_r : RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'h'>"> {
-    let ParserMatchClass = PPRVectorListMul<16, 2>;
-  }
-
-  def PP_s_mul_r : RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'s'>"> {
-    let ParserMatchClass = PPRVectorListMul<32, 2>;
-  }
+  def PP_b_mul_r : PPR2MulRegOp<"b", 8,  ElementSizeB>;
+  def PP_h_mul_r : PPR2MulRegOp<"h", 16, ElementSizeH>;
+  def PP_s_mul_r : PPR2MulRegOp<"s", 32, ElementSizeS>;
+  def PP_d_mul_r : PPR2MulRegOp<"d", 64, ElementSizeD>;
 
-  def PP_d_mul_r : RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'d'>"> {
-    let ParserMatchClass = PPRVectorListMul<64, 2>;
-  }
 }  // end let EncoderMethod/DecoderMethod
 
 
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index f8c1fe81c6783..166219de9dfe9 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10405,7 +10405,7 @@ multiclass sve2p1_int_while_rr_pn<string mnemonic, bits<3> opc> {
 
 // SVE integer compare scalar count and limit (predicate pair)
 class sve2p1_int_while_rr_pair<string mnemonic, bits<2> sz, bits<3> opc,
-                             RegisterOperand ppr_ty>
+                               PPR2MulRegOp ppr_ty>
     : I<(outs ppr_ty:$Pd), (ins GPR64:$Rn, GPR64:$Rm),
         mnemonic, "\t$Pd, $Rn, $Rm",
         "", []>, Sched<[]> {
@@ -10425,6 +10425,8 @@ class sve2p1_int_while_rr_pair<string mnemonic, bits<2> sz, bits<3> opc,
 
   let Defs = [NZCV];
   let hasSideEffects = 0;
+  let ElementSize = ppr_ty.ElementSize;
+  let isWhile = 1;
 }
 
 
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
index 441dedc6e9d01..d3a1be3de17fb 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
@@ -571,7 +571,6 @@ body:             |
     ; CHECK-NEXT: [[WHILEGE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
index 305347b60309f..fb92955f02d52 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
@@ -611,7 +611,6 @@ body:             |
     ; CHECK-NEXT: [[WHILEGT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
index 71195185ecec7..97f242b852eb8 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
@@ -571,7 +571,6 @@ body:             |
     ; CHECK-NEXT: [[WHILEHI_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHI_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
index e8f94857ae958..0ec4788957335 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
@@ -571,7 +571,6 @@ body:             |
     ; CHECK-NEXT: [[WHILEHS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
index 44e4d8ad0c239..a4cb32bdea624 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
@@ -571,7 +571,6 @@ body:             |
     ; CHECK-NEXT: [[WHILELE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
index 3df9cbda0d187..fdddf50832bb3 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
@@ -571,7 +571,6 @@ body:             |
     ; CHECK-NEXT: [[WHILELO_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELO_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
index 1dae74fdf9a2c..26297e36dceda 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
@@ -571,7 +571,6 @@ body:             |
     ; CHECK-NEXT: [[WHILELS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELS_2PXX_D]].psub0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELS_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
index b55aa471cbba9..c3bde639702ed 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
@@ -571,7 +571,6 @@ body:             |
     ; CHECK-NEXT: [[WHILELT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELT_2PXX_D]].psub0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELT_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]

>From 81db79a35f1ff4b35d05dce6ab3f0a2df1c8aace Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Tue, 2 Sep 2025 15:50:35 +0000
Subject: [PATCH 3/8] - Run clang-format

---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 ++--
 llvm/lib/Target/AArch64/AArch64RegisterInfo.td  | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ecc8c739a98b8..1c4443e35e3a8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20195,7 +20195,7 @@ static bool isPredicateCCSettingOp(SDValue N) {
       (N.getOpcode() == ISD::GET_ACTIVE_LANE_MASK) ||
       (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
        (N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
-       (N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege_x2 ||
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege_x2 ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt_x2 ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
@@ -20209,7 +20209,7 @@ static bool isPredicateCCSettingOp(SDValue N) {
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels_x2 ||
         N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
-        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt_x2))))
+        N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt_x2)))
     return true;
 
   return false;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 47156d8d1a4b7..72c303fcbc55b 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1164,8 +1164,8 @@ class PPRVectorListMul<int ElementWidth, int NumRegs> : PPRVectorList<ElementWid
                                                                 ", AArch64::PPRMul2RegClassID>";
 }
 
-class PPR2MulRegOp<string Suffix, int Size, ElementSizeEnum ES> :
-      RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'" # Suffix # "'>"> {
+class PPR2MulRegOp<string Suffix, int Size, ElementSizeEnum ES>
+    : RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'"#Suffix#"'>"> {
   ElementSizeEnum ElementSize;
   let ElementSize = ES;
   let ParserMatchClass = PPRVectorListMul<Size, 2>;
@@ -1179,8 +1179,7 @@ let EncoderMethod = "EncodeRegMul_MinMax<2, 0, 14>",
   def PP_s_mul_r : PPR2MulRegOp<"s", 32, ElementSizeS>;
   def PP_d_mul_r : PPR2MulRegOp<"d", 64, ElementSizeD>;
 
-}  // end let EncoderMethod/DecoderMethod
-
+} // end let EncoderMethod/DecoderMethod
 
 //===----------------------------------------------------------------------===//
 // SVE vector register classes

>From 59203bbde99a4cda77ba2542fa53e7e9b3678e34 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Wed, 3 Sep 2025 13:08:07 +0000
Subject: [PATCH 4/8] - Add tests for performFirstTrueTestVectorCombine after
 adding predicate   pair intrinsics to isPredicateCCSettingOp

---
 llvm/test/CodeGen/AArch64/sve-cmp-folds.ll | 123 ++++++++++++++++++++-
 1 file changed, 122 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
index 981cc88298a3e..bc039b57e9645 100644
--- a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve2 -o - < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve2p1 -o - < %s | FileCheck %s
 
 define <vscale x 8 x i1> @not_icmp_sle_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: not_icmp_sle_nxv8i16:
@@ -220,6 +220,118 @@ define i1 @lane_mask_first(i64 %next, i64 %end) {
   ret i1 %bit
 }
 
+define i1 @whilege_x2_first(i64 %next, i64 %end) {
+; CHECK-LABEL: whilege_x2_first:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1.i64(i64 %next, i64 %end)
+  %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
+  %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+  ret i1 %bit
+}
+
+define i1 @whilegt_x2_first(i64 %next, i64 %end) {
+; CHECK-LABEL: whilegt_x2_first:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilegt { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1.i64(i64 %next, i64 %end)
+  %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
+  %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+  ret i1 %bit
+}
+
+define i1 @whilehi_x2_first(i64 %next, i64 %end) {
+; CHECK-LABEL: whilehi_x2_first:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehi { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1.i64(i64 %next, i64 %end)
+  %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
+  %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+  ret i1 %bit
+}
+
+define i1 @whilehs_x2_first(i64 %next, i64 %end) {
+; CHECK-LABEL: whilehs_x2_first:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilehs { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1.i64(i64 %next, i64 %end)
+  %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
+  %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+  ret i1 %bit
+}
+
+define i1 @whilele_x2_first(i64 %next, i64 %end) {
+; CHECK-LABEL: whilele_x2_first:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilele { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1.i64(i64 %next, i64 %end)
+  %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
+  %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+  ret i1 %bit
+}
+
+define i1 @whilelo_x2_first(i64 %next, i64 %end) {
+; CHECK-LABEL: whilelo_x2_first:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelo { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1.i64(i64 %next, i64 %end)
+  %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
+  %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+  ret i1 %bit
+}
+
+define i1 @whilels_x2_first(i64 %next, i64 %end) {
+; CHECK-LABEL: whilels_x2_first:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilels { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1.i64(i64 %next, i64 %end)
+  %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
+  %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+  ret i1 %bit
+}
+
+define i1 @whilelt_x2_first(i64 %next, i64 %end) {
+; CHECK-LABEL: whilelt_x2_first:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilelt { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1.i64(i64 %next, i64 %end)
+  %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
+  %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+  ret i1 %bit
+}
+
 declare i64 @llvm.vscale.i64()
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64, i64)
@@ -230,3 +342,12 @@ declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64, i64)
 declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)
+
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64, i64)
+declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64, i64)

>From d7375f9c219783204fa54bed9c4a9c3711de1970 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Wed, 3 Sep 2025 13:09:42 +0000
Subject: [PATCH 5/8] - Update check lines in new tests

---
 llvm/test/CodeGen/AArch64/sve-cmp-folds.ll | 32 ++++++----------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
index bc039b57e9645..0d964a488e9ec 100644
--- a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
@@ -224,9 +224,7 @@ define i1 @whilege_x2_first(i64 %next, i64 %end) {
 ; CHECK-LABEL: whilege_x2_first:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1.i64(i64 %next, i64 %end)
   %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
@@ -238,9 +236,7 @@ define i1 @whilegt_x2_first(i64 %next, i64 %end) {
 ; CHECK-LABEL: whilegt_x2_first:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1.i64(i64 %next, i64 %end)
   %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
@@ -252,9 +248,7 @@ define i1 @whilehi_x2_first(i64 %next, i64 %end) {
 ; CHECK-LABEL: whilehi_x2_first:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1.i64(i64 %next, i64 %end)
   %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
@@ -266,9 +260,7 @@ define i1 @whilehs_x2_first(i64 %next, i64 %end) {
 ; CHECK-LABEL: whilehs_x2_first:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1.i64(i64 %next, i64 %end)
   %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
@@ -280,9 +272,7 @@ define i1 @whilele_x2_first(i64 %next, i64 %end) {
 ; CHECK-LABEL: whilele_x2_first:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1.i64(i64 %next, i64 %end)
   %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
@@ -294,9 +284,7 @@ define i1 @whilelo_x2_first(i64 %next, i64 %end) {
 ; CHECK-LABEL: whilelo_x2_first:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1.i64(i64 %next, i64 %end)
   %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
@@ -308,9 +296,7 @@ define i1 @whilels_x2_first(i64 %next, i64 %end) {
 ; CHECK-LABEL: whilels_x2_first:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1.i64(i64 %next, i64 %end)
   %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
@@ -322,9 +308,7 @@ define i1 @whilelt_x2_first(i64 %next, i64 %end) {
 ; CHECK-LABEL: whilelt_x2_first:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    mov z0.s, p0/z, #1 // =0x1
-; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    cset w0, mi
 ; CHECK-NEXT:    ret
   %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1.i64(i64 %next, i64 %end)
   %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0

>From 2eaccb6f537bf916d173f37d43a49e409748f15e Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Thu, 4 Sep 2025 14:01:53 +0000
Subject: [PATCH 6/8] - Ensure both performFirstTrueTestVectorCombine &
 optimizePTestInstr only   consider extracts/copies from the first result of
 whilecc_x2 - Add negative tests

---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  2 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  | 27 ++++------
 llvm/test/CodeGen/AArch64/sve-cmp-folds.ll    | 15 ++++++
 .../AArch64/sve-ptest-removal-whilege.mir     | 51 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilegt.mir     | 51 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilehi.mir     | 51 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilehs.mir     | 51 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilele.mir     | 51 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilelo.mir     | 51 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilels.mir     | 51 +++++++++++++++++++
 .../AArch64/sve-ptest-removal-whilelt.mir     | 51 +++++++++++++++++++
 11 files changed, 435 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1c4443e35e3a8..a194147d09396 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20235,7 +20235,7 @@ performFirstTrueTestVectorCombine(SDNode *N,
 
   // Restricted the DAG combine to only cases where we're extracting from a
   // flag-setting operation.
-  if (!isPredicateCCSettingOp(N0))
+  if (!isPredicateCCSettingOp(N0) || N0.getResNo() != 0)
     return SDValue();
 
   // Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 521766af0af25..0a3d517d2adaf 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1488,21 +1488,6 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
   bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
   bool PredIsWhileLike = isWhileOpcode(PredOpcode);
 
-  uint64_t PredEltSize = 0;
-  if (PredIsWhileLike)
-    PredEltSize = getElementSizeForOpcode(PredOpcode);
-
-  if (Pred->isCopy()) {
-    // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
-    // before the branch to extract each subregister.
-    auto Op = Pred->getOperand(1);
-    if (Op.isReg() && Op.getReg().isVirtual() && Op.getSubReg() != 0) {
-      MachineInstr *DefMI = MRI->getVRegDef(Op.getReg());
-      PredIsWhileLike = isWhileOpcode(DefMI->getOpcode());
-      PredEltSize = getElementSizeForOpcode(DefMI->getOpcode());
-    }
-  }
-
   if (PredIsWhileLike) {
     // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
     // instruction and the condition is "any" since WHILcc does an implicit
@@ -1514,7 +1499,8 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
     // redundant since WHILE performs an implicit PTEST with an all active
     // mask.
     if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
-        getElementSizeForOpcode(MaskOpcode) == PredEltSize)
+        getElementSizeForOpcode(MaskOpcode) ==
+        getElementSizeForOpcode(PredOpcode))
       return PredOpcode;
 
     return {};
@@ -1627,6 +1613,15 @@ bool AArch64InstrInfo::optimizePTestInstr(
     const MachineRegisterInfo *MRI) const {
   auto *Mask = MRI->getUniqueVRegDef(MaskReg);
   auto *Pred = MRI->getUniqueVRegDef(PredReg);
+
+  if (Pred->isCopy()) {
+    // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
+    // before the branch to extract each subregister.
+    auto Op = Pred->getOperand(1);
+    if (Op.isReg() && Op.getSubReg() == AArch64::psub0)
+      Pred = MRI->getUniqueVRegDef(Op.getReg());
+  }
+
   unsigned PredOpcode = Pred->getOpcode();
   auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
   if (!NewOp)
diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
index 0d964a488e9ec..ef88f0f918e64 100644
--- a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
@@ -316,6 +316,21 @@ define i1 @whilelt_x2_first(i64 %next, i64 %end) {
   ret i1 %bit
 }
 
+; Do not combine to ptest when the extract is not from the first vector result
+define i1 @whilege_x2_second_result(i64 %next, i64 %end) {
+; CHECK-LABEL: whilege_x2_second_result:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    whilege { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    mov z0.s, p1/z, #1 // =0x1
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    ret
+  %predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1.i64(i64 %next, i64 %end)
+  %predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 1
+  %bit = extractelement <vscale x 4 x i1> %predicate, i64 0
+  ret i1 %bit
+}
+
 declare i64 @llvm.vscale.i64()
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)
 declare <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64, i64)
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
index d3a1be3de17fb..5f752583bc91b 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
@@ -587,3 +587,54 @@ body:             |
     $w0 = COPY %7
     RET_ReallyLR implicit $w0
 ...
+
+# PTEST is not redundant when it's Pg operand is a subregister copy, but not
+# from the first subregister of ppr2mul2
+---
+name:            whilege_x2_b64_s64_psub1
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilege_x2_b64_s64_psub1
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILEGE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILEGE_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %5, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
index fb92955f02d52..b49941f90261e 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
@@ -627,3 +627,54 @@ body:             |
     $w0 = COPY %7
     RET_ReallyLR implicit $w0
 ...
+
+# PTEST is not redundant when it's Pg operand is a subregister copy, but not
+# from the first subregister of ppr2mul2
+---
+name:            whilegt_x2_b64_s64_psub1
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilegt_x2_b64_s64_psub1
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILEGT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILEGT_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %5, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
index 97f242b852eb8..5ad712669df5b 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
@@ -587,3 +587,54 @@ body:             |
     $w0 = COPY %7
     RET_ReallyLR implicit $w0
 ...
+
+# PTEST is not redundant when it's Pg operand is a subregister copy, but not
+# from the first subregister of ppr2mul2
+---
+name:            whilehi_x2_b64_s64_psub1
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilehi_x2_b64_s64_psub1
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILEHI_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHI_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILEHI_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %5, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
index 0ec4788957335..35615ab85ff47 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
@@ -587,3 +587,54 @@ body:             |
     $w0 = COPY %7
     RET_ReallyLR implicit $w0
 ...
+
+# PTEST is not redundant when it's Pg operand is a subregister copy, but not
+# from the first subregister of ppr2mul2
+---
+name:            whilehs_x2_b64_s64_psub1
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilehs_x2_b64_s64_psub1
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILEHS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILEHS_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %5, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
index a4cb32bdea624..65521f0352366 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
@@ -587,3 +587,54 @@ body:             |
     $w0 = COPY %7
     RET_ReallyLR implicit $w0
 ...
+
+# PTEST is not redundant when it's Pg operand is a subregister copy, but not
+# from the first subregister of ppr2mul2
+---
+name:            whilele_x2_b64_s64_psub1
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilele_x2_b64_s64_psub1
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILELE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILELE_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %5, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
index fdddf50832bb3..835abf4f6bb9b 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
@@ -587,3 +587,54 @@ body:             |
     $w0 = COPY %7
     RET_ReallyLR implicit $w0
 ...
+
+# PTEST is not redundant when it's Pg operand is a subregister copy, but not
+# from the first subregister of ppr2mul2
+---
+name:            whilelo_x2_b64_s64_psub1
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilelo_x2_b64_s64_psub1
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILELO_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELO_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILELO_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %5, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
index 26297e36dceda..8a4764e629132 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
@@ -587,3 +587,54 @@ body:             |
     $w0 = COPY %7
     RET_ReallyLR implicit $w0
 ...
+
+# PTEST is not redundant when it's Pg operand is a subregister copy, but not
+# from the first subregister of ppr2mul2
+---
+name:            whilels_x2_b64_s64_psub1
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilels_x2_b64_s64_psub1
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILELS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELS_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELS_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILELS_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %5, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
index c3bde639702ed..b66c2599d71aa 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
@@ -587,3 +587,54 @@ body:             |
     $w0 = COPY %7
     RET_ReallyLR implicit $w0
 ...
+
+# PTEST is not redundant when it's Pg operand is a subregister copy, but not
+# from the first subregister of ppr2mul2
+---
+name:            whilelt_x2_b64_s64_psub1
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilelt_x2_b64_s64_psub1
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILELT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELT_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELT_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILELT_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %5, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...

>From e73955b6d0192c934e6815a0503fe9a4f3e2692e Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Fri, 5 Sep 2025 09:04:03 +0000
Subject: [PATCH 7/8] - Run clang-format

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 0a3d517d2adaf..455d0a7b68109 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1500,7 +1500,7 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
     // mask.
     if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
         getElementSizeForOpcode(MaskOpcode) ==
-        getElementSizeForOpcode(PredOpcode))
+            getElementSizeForOpcode(PredOpcode))
       return PredOpcode;
 
     return {};

>From bc7b642ad7d97961fc442f3326c432e0bf85c041 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin at arm.com>
Date: Mon, 8 Sep 2025 14:21:03 +0000
Subject: [PATCH 8/8] - Only consider removing PTest if opcode is
 PTEST_PP_FIRST - Remove redundant mir tests

---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |   2 +-
 .../AArch64/sve-ptest-removal-whilege.mir     | 100 ------------------
 .../AArch64/sve-ptest-removal-whilegt.mir     | 100 ------------------
 .../AArch64/sve-ptest-removal-whilehi.mir     | 100 ------------------
 .../AArch64/sve-ptest-removal-whilehs.mir     | 100 ------------------
 .../AArch64/sve-ptest-removal-whilele.mir     | 100 ------------------
 .../AArch64/sve-ptest-removal-whilelo.mir     |  57 +++++++++-
 .../AArch64/sve-ptest-removal-whilels.mir     | 100 ------------------
 .../AArch64/sve-ptest-removal-whilelt.mir     | 100 ------------------
 9 files changed, 55 insertions(+), 704 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 455d0a7b68109..0f2a8f9fec63d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1614,7 +1614,7 @@ bool AArch64InstrInfo::optimizePTestInstr(
   auto *Mask = MRI->getUniqueVRegDef(MaskReg);
   auto *Pred = MRI->getUniqueVRegDef(PredReg);
 
-  if (Pred->isCopy()) {
+  if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
     // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
     // before the branch to extract each subregister.
     auto Op = Pred->getOperand(1);
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
index 5f752583bc91b..69a2c88d7dbad 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir
@@ -538,103 +538,3 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
-
-# WHILEGE (predicate pair)
----
-name:            whilege_x2_b64_s64
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilege_x2_b64_s64
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILEGE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub1
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILEGE_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %4, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
-
-# PTEST is not redundant when it's Pg operand is a subregister copy, but not
-# from the first subregister of ppr2mul2
----
-name:            whilege_x2_b64_s64_psub1
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilege_x2_b64_s64_psub1
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILEGE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILEGE_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %5, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
index b49941f90261e..58db85aba80ad 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir
@@ -578,103 +578,3 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
-
-# WHILEGT (predicate pair)
----
-name:            whilegt_x2_b64_s64
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilegt_x2_b64_s64
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILEGT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub1
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILEGT_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %4, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
-
-# PTEST is not redundant when it's Pg operand is a subregister copy, but not
-# from the first subregister of ppr2mul2
----
-name:            whilegt_x2_b64_s64_psub1
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilegt_x2_b64_s64_psub1
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILEGT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILEGT_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %5, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
index 5ad712669df5b..03d9768258ebc 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir
@@ -538,103 +538,3 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
-
-# WHILEHI (predicate pair)
----
-name:            whilehi_x2_b64_s64
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilehi_x2_b64_s64
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILEHI_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHI_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub1
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILEHI_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %4, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
-
-# PTEST is not redundant when it's Pg operand is a subregister copy, but not
-# from the first subregister of ppr2mul2
----
-name:            whilehi_x2_b64_s64_psub1
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilehi_x2_b64_s64_psub1
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILEHI_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHI_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILEHI_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %5, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
index 35615ab85ff47..68ecd79c8325b 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir
@@ -538,103 +538,3 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
-
-# WHILEHS (predicate pair)
----
-name:            whilehs_x2_b64_s64
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilehs_x2_b64_s64
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILEHS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub1
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILEHS_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %4, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
-
-# PTEST is not redundant when it's Pg operand is a subregister copy, but not
-# from the first subregister of ppr2mul2
----
-name:            whilehs_x2_b64_s64_psub1
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilehs_x2_b64_s64_psub1
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILEHS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILEHS_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %5, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
index 65521f0352366..16dcb2cebec7e 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir
@@ -538,103 +538,3 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
-
-# WHILELE (predicate pair)
----
-name:            whilele_x2_b64_s64
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilele_x2_b64_s64
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILELE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub1
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILELE_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %4, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
-
-# PTEST is not redundant when it's Pg operand is a subregister copy, but not
-# from the first subregister of ppr2mul2
----
-name:            whilele_x2_b64_s64_psub1
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilele_x2_b64_s64_psub1
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILELE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILELE_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %5, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
index 835abf4f6bb9b..7d083f0965785 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir
@@ -581,7 +581,7 @@ body:             |
     %3:ppr2mul2 = WHILELO_2PXX_D %0, %1, implicit-def $nzcv
     %4:ppr = COPY %3.psub0
     %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %4, implicit-def $nzcv
+    PTEST_PP_FIRST killed %2, killed %4, implicit-def $nzcv
     %6:gpr32 = COPY $wzr
     %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
     $w0 = COPY %7
@@ -621,7 +621,7 @@ body:             |
     ; CHECK-NEXT: [[WHILELO_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELO_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub0
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: PTEST_PP_FIRST killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
@@ -632,7 +632,58 @@ body:             |
     %3:ppr2mul2 = WHILELO_2PXX_D %0, %1, implicit-def $nzcv
     %4:ppr = COPY %3.psub0
     %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %5, implicit-def $nzcv
+    PTEST_PP_FIRST killed %2, killed %5, implicit-def $nzcv
+    %6:gpr32 = COPY $wzr
+    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
+    $w0 = COPY %7
+    RET_ReallyLR implicit $w0
+...
+
+# PTEST is not redundant when it's Pg operand is a copy from subregister 0
+# if the condition is not FIRST_ACTIVE
+---
+name:            whilelo_x2_b64_s64_not_first
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64 }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: ppr }
+  - { id: 3, class: ppr2mul2 }
+  - { id: 4, class: ppr }
+  - { id: 5, class: ppr }
+  - { id: 6, class: gpr32 }
+  - { id: 7, class: gpr32 }
+liveins:
+  - { reg: '$x0', virtual-reg: '%0' }
+  - { reg: '$x1', virtual-reg: '%1' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: whilelo_x2_b64_s64_not_first
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
+    ; CHECK-NEXT: [[WHILELO_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELO_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub1
+    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:ppr = PTRUE_D 31, implicit $vg
+    %3:ppr2mul2 = WHILELO_2PXX_D %0, %1, implicit-def $nzcv
+    %4:ppr = COPY %3.psub0
+    %5:ppr = COPY %3.psub1
+    PTEST_PP killed %2, killed %4, implicit-def $nzcv
     %6:gpr32 = COPY $wzr
     %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
     $w0 = COPY %7
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
index 8a4764e629132..9b378a83e917e 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilels.mir
@@ -538,103 +538,3 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
-
-# WHILELS (predicate pair)
----
-name:            whilels_x2_b64_s64
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilels_x2_b64_s64
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILELS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELS_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELS_2PXX_D]].psub1
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILELS_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %4, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
-
-# PTEST is not redundant when it's Pg operand is a subregister copy, but not
-# from the first subregister of ppr2mul2
----
-name:            whilels_x2_b64_s64_psub1
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilels_x2_b64_s64_psub1
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILELS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELS_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELS_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILELS_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %5, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
index b66c2599d71aa..ef88a8dd848b0 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelt.mir
@@ -538,103 +538,3 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
-
-# WHILELT (predicate pair)
----
-name:            whilelt_x2_b64_s64
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilelt_x2_b64_s64
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILELT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELT_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELT_2PXX_D]].psub1
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILELT_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %4, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...
-
-# PTEST is not redundant when it's Pg operand is a subregister copy, but not
-# from the first subregister of ppr2mul2
----
-name:            whilelt_x2_b64_s64_psub1
-alignment:       2
-tracksRegLiveness: true
-registers:
-  - { id: 0, class: gpr64 }
-  - { id: 1, class: gpr64 }
-  - { id: 2, class: ppr }
-  - { id: 3, class: ppr2mul2 }
-  - { id: 4, class: ppr }
-  - { id: 5, class: ppr }
-  - { id: 6, class: gpr32 }
-  - { id: 7, class: gpr32 }
-liveins:
-  - { reg: '$x0', virtual-reg: '%0' }
-  - { reg: '$x1', virtual-reg: '%1' }
-frameInfo:
-  maxCallFrameSize: 0
-body:             |
-  bb.0.entry:
-    liveins: $x0, $x1
-
-    ; CHECK-LABEL: name: whilelt_x2_b64_s64_psub1
-    ; CHECK: liveins: $x0, $x1
-    ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
-    ; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
-    ; CHECK-NEXT: [[WHILELT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELT_2PXX_D]].psub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELT_2PXX_D]].psub1
-    ; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    %0:gpr64 = COPY $x0
-    %1:gpr64 = COPY $x1
-    %2:ppr = PTRUE_D 31, implicit $vg
-    %3:ppr2mul2 = WHILELT_2PXX_D %0, %1, implicit-def $nzcv
-    %4:ppr = COPY %3.psub0
-    %5:ppr = COPY %3.psub1
-    PTEST_PP killed %2, killed %5, implicit-def $nzcv
-    %6:gpr32 = COPY $wzr
-    %7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
-    $w0 = COPY %7
-    RET_ReallyLR implicit $w0
-...



More information about the llvm-commits mailing list