[llvm] 7dc5504 - [LLVM][CodeGen][AArch64] Improve PTEST removal by looking through copies. (#132041)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 05:43:16 PDT 2025
Author: Paul Walker
Date: 2025-03-20T12:43:11Z
New Revision: 7dc550473c5239399707804c9466519d165aa7fd
URL: https://github.com/llvm/llvm-project/commit/7dc550473c5239399707804c9466519d165aa7fd
DIFF: https://github.com/llvm/llvm-project/commit/7dc550473c5239399707804c9466519d165aa7fd.diff
LOG: [LLVM][CodeGen][AArch64] Improve PTEST removal by looking through copies. (#132041)
The general predicates of the PTEST and PTEST_like instructions may
belong to different register classes. This can lead to the insertion of
a COPY instruction, making them appear different. However, for the
purpose of PTEST removal, such copies are irrelevant, and we can look
through them to improve the likelihood of finding a match.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index c91590fa43601..9f8082b64ab18 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1491,13 +1491,22 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
return PredOpcode;
+ auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
+
+ // If the PTEST like instruction's general predicate is not `Mask`, attempt
+ // to look through a copy and try again. This is because some instructions
+ // take a predicate whose register class is a subset of its result class.
+ if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
+ PTestLikeMask->getOperand(1).getReg().isVirtual())
+ PTestLikeMask =
+ MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
+
// For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
// the element size matches and either the PTEST_LIKE instruction uses
// the same all active mask or the condition is "any".
if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
getElementSizeForOpcode(MaskOpcode) ==
getElementSizeForOpcode(PredOpcode)) {
- auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
return PredOpcode;
}
@@ -1524,7 +1533,6 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
// active flag, whereas the PTEST instruction with the same mask doesn't.
// For PTEST_ANY this doesn't apply as the flags in this case would be
// identical regardless of element size.
- auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
PTest->getOpcode() == AArch64::PTEST_PP_ANY))
diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
index bd1ff4c2c9726..7ae9b60a3d331 100644
--- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
+++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-cmpeq.mir
@@ -661,3 +661,48 @@ body: |
RET_ReallyLR implicit $w0
...
+---
+name: cmpeq_nxv16i8_ptest_with_register_class_mismatch
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: ppr }
+ - { id: 1, class: zpr }
+ - { id: 2, class: zpr }
+ - { id: 3, class: ppr_3b }
+ - { id: 4, class: ppr }
+ - { id: 5, class: gpr32 }
+ - { id: 6, class: gpr32 }
+liveins:
+ - { reg: '$z0', virtual-reg: '%1' }
+ - { reg: '$z1', virtual-reg: '%2' }
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0:
+ liveins: $z0, $z1
+
+ ; CHECK-LABEL: name: cmpeq_nxv16i8_ptest_with_register_class_mismatch
+ ; CHECK: liveins: $z0, $z1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:zpr = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:zpr = COPY $z1
+ ; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr_3b = COPY [[PTRUE_B]]
+ ; CHECK-NEXT: [[CMPEQ_PPzZZ_B:%[0-9]+]]:ppr = CMPEQ_PPzZZ_B [[COPY2]], [[COPY]], [[COPY1]], implicit-def $nzcv
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $wzr
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY3]], $wzr, 0, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:zpr = COPY $z0
+ %2:zpr = COPY $z1
+ %0:ppr = PTRUE_B 31, implicit $vg
+ %3:ppr_3b = COPY %0
+ %4:ppr = CMPEQ_PPzZZ_B %3, %1, %2, implicit-def dead $nzcv
+ PTEST_PP %0, killed %4, implicit-def $nzcv
+ %5:gpr32 = COPY $wzr
+ %6:gpr32 = CSINCWr %5, $wzr, 0, implicit $nzcv
+ $w0 = COPY %6
+ RET_ReallyLR implicit $w0
+
+...
More information about the llvm-commits
mailing list