[llvm] [LLVM][CodeGen][AArch64] Improve PTEST removal by looking through copies. (PR #132041)

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 19 07:43:00 PDT 2025


https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/132041

The general predicates of the PTEST and PTEST_like instructions may belong to different register classes. This can lead to the insertion of a COPY instruction, making them appear different. However, for the purpose of PTEST removal, such copies are irrelevant, and we can look through them to improve the likelihood of finding a match.

>From 715d144092219eebb60fd64727b9db6c0b5ac5e7 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Wed, 19 Mar 2025 14:10:16 +0000
Subject: [PATCH 1/2] Add test.

---
 .../AArch64/sve-ptest-removal-cmpeq.mir       | 46 +++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
index bd1ff4c2c9726..d81bc0d0f07f8 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
@@ -661,3 +661,49 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+---
+name:            cmpeq_nxv16i8_ptest_with_register_class_mismatch
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: ppr }
+  - { id: 1, class: zpr }
+  - { id: 2, class: zpr }
+  - { id: 3, class: ppr_3b }
+  - { id: 4, class: ppr }
+  - { id: 5, class: gpr32 }
+  - { id: 6, class: gpr32 }
+liveins:
+  - { reg: '$z0', virtual-reg: '%1' }
+  - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0:
+    liveins: $z0, $z1
+
+    ; CHECK-LABEL: name: cmpeq_nxv16i8_ptest_with_register_class_mismatch
+    ; CHECK: liveins: $z0, $z1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:zpr = COPY $z0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z1
+    ; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr_3b = COPY [[PTRUE_B]]
+    ; CHECK-NEXT: [[CMPEQ_PPzZZ_B:%[0-9]+]]:ppr = CMPEQ_PPzZZ_B [[COPY2]], [[COPY]], [[COPY1]], implicit-def dead $nzcv
+    ; CHECK-NEXT: PTEST_PP [[PTRUE_B]], killed [[CMPEQ_PPzZZ_B]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY3]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:zpr = COPY $z0
+    %2:zpr = COPY $z1
+    %0:ppr = PTRUE_B 31, implicit $vg
+    %3:ppr_3b = COPY %0
+    %4:ppr = CMPEQ_PPzZZ_B %3, %1, %2, implicit-def dead $nzcv
+    PTEST_PP %0, killed %4, implicit-def $nzcv
+    %5:gpr32 = COPY $wzr
+    %6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv
+    $w0 = COPY %6
+    RET_ReallyLR implicit $w0
+
+...

>From 6d3a4776c800f10006e0c158a53296c9bb299bf6 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Tue, 18 Mar 2025 14:50:34 +0000
Subject: [PATCH 2/2] [LLVM][CodeGen][AArch64] Improve PTEST removal by looking
 through copies.

The general predicates of the PTEST and PTEST_like instructions may
belong to different register classes. This can lead to the insertion
of a COPY instruction, making them appear different. However, for
the purpose of PTEST removal, such copies are irrelevant, and we can
look through them to improve the likelihood of finding a match.
---
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp         | 12 ++++++++++--
 .../test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir |  3 +--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index c91590fa43601..9f8082b64ab18 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1491,13 +1491,22 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
     if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
       return PredOpcode;
 
+    auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+
+    // If the PTEST like instruction's general predicate is not `Mask`, attempt
+    // to look through a copy and try again. This is because some instructions
+    // take a predicate whose register class is a subset of its result class.
+    if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
+        PTestLikeMask->getOperand(1).getReg().isVirtual())
+      PTestLikeMask =
+          MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
+
     // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
     // the element size matches and either the PTEST_LIKE instruction uses
     // the same all active mask or the condition is "any".
     if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
         getElementSizeForOpcode(MaskOpcode) ==
             getElementSizeForOpcode(PredOpcode)) {
-      auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
       if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
         return PredOpcode;
     }
@@ -1524,7 +1533,6 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
     // active flag, whereas the PTEST instruction with the same mask doesn't.
     // For PTEST_ANY this doesn't apply as the flags in this case would be
     // identical regardless of element size.
-    auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
     uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
     if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
                                   PTest->getOpcode() == AArch64::PTEST_PP_ANY))
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
index d81bc0d0f07f8..7ae9b60a3d331 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
@@ -689,8 +689,7 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z1
     ; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr_3b = COPY [[PTRUE_B]]
-    ; CHECK-NEXT: [[CMPEQ_PPzZZ_B:%[0-9]+]]:ppr = CMPEQ_PPzZZ_B [[COPY2]], [[COPY]], [[COPY1]], implicit-def dead $nzcv
-    ; CHECK-NEXT: PTEST_PP [[PTRUE_B]], killed [[CMPEQ_PPzZZ_B]], implicit-def $nzcv
+    ; CHECK-NEXT: [[CMPEQ_PPzZZ_B:%[0-9]+]]:ppr = CMPEQ_PPzZZ_B [[COPY2]], [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $wzr
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY3]], $wzr, 0, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]



More information about the llvm-commits mailing list