[llvm] 7dc5504 - [LLVM][CodeGen][AArch64] Improve PTEST removal by looking through copies. (#132041)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 20 05:43:16 PDT 2025


Author: Paul Walker
Date: 2025-03-20T12:43:11Z
New Revision: 7dc550473c5239399707804c9466519d165aa7fd

URL: https://github.com/llvm/llvm-project/commit/7dc550473c5239399707804c9466519d165aa7fd
DIFF: https://github.com/llvm/llvm-project/commit/7dc550473c5239399707804c9466519d165aa7fd.diff

LOG: [LLVM][CodeGen][AArch64] Improve PTEST removal by looking through copies. (#132041)

The general predicates of the PTEST and PTEST_like instructions may
belong to different register classes. This can lead to the insertion of
a COPY instruction, making them appear different. However, for the
purpose of PTEST removal, such copies are irrelevant, and we can look
through them to improve the likelihood of finding a match.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index c91590fa43601..9f8082b64ab18 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1491,13 +1491,22 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
     if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
       return PredOpcode;
 
+    auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+
+    // If the PTEST like instruction's general predicate is not `Mask`, attempt
+    // to look through a copy and try again. This is because some instructions
+    // take a predicate whose register class is a subset of its result class.
+    if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
+        PTestLikeMask->getOperand(1).getReg().isVirtual())
+      PTestLikeMask =
+          MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
+
     // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
     // the element size matches and either the PTEST_LIKE instruction uses
     // the same all active mask or the condition is "any".
     if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
         getElementSizeForOpcode(MaskOpcode) ==
             getElementSizeForOpcode(PredOpcode)) {
-      auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
       if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
         return PredOpcode;
     }
@@ -1524,7 +1533,6 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
     // active flag, whereas the PTEST instruction with the same mask doesn't.
     // For PTEST_ANY this doesn't apply as the flags in this case would be
     // identical regardless of element size.
-    auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
     uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
     if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
                                   PTest->getOpcode() == AArch64::PTEST_PP_ANY))

diff  --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
index bd1ff4c2c9726..7ae9b60a3d331 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
@@ -661,3 +661,48 @@ body:             |
     RET_ReallyLR implicit $w0
 
 ...
+---
+name:            cmpeq_nxv16i8_ptest_with_register_class_mismatch
+alignment:       2
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: ppr }
+  - { id: 1, class: zpr }
+  - { id: 2, class: zpr }
+  - { id: 3, class: ppr_3b }
+  - { id: 4, class: ppr }
+  - { id: 5, class: gpr32 }
+  - { id: 6, class: gpr32 }
+liveins:
+  - { reg: '$z0', virtual-reg: '%1' }
+  - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+  maxCallFrameSize: 0
+body:             |
+  bb.0:
+    liveins: $z0, $z1
+
+    ; CHECK-LABEL: name: cmpeq_nxv16i8_ptest_with_register_class_mismatch
+    ; CHECK: liveins: $z0, $z1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:zpr = COPY $z0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z1
+    ; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr_3b = COPY [[PTRUE_B]]
+    ; CHECK-NEXT: [[CMPEQ_PPzZZ_B:%[0-9]+]]:ppr = CMPEQ_PPzZZ_B [[COPY2]], [[COPY]], [[COPY1]], implicit-def $nzcv
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY3]], $wzr, 0, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:zpr = COPY $z0
+    %2:zpr = COPY $z1
+    %0:ppr = PTRUE_B 31, implicit $vg
+    %3:ppr_3b = COPY %0
+    %4:ppr = CMPEQ_PPzZZ_B %3, %1, %2, implicit-def dead $nzcv
+    PTEST_PP %0, killed %4, implicit-def $nzcv
+    %5:gpr32 = COPY $wzr
+    %6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv
+    $w0 = COPY %6
+    RET_ReallyLR implicit $w0
+
+...


        


More information about the llvm-commits mailing list