[llvm] [AArch64] Peephole optimization to remove redundant csel instructions (PR #101483)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 1 06:22:42 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Marina (citymarina)
<details>
<summary>Changes</summary>
Given a sequence such as
%8:gpr64 = COPY $xzr
%10:gpr64 = COPY $xzr
%11:gpr64 = CSELXr %8:gpr64, %10:gpr64, 0, implicit $nzcv
`PeepholeOptimizer::foldRedundantCopy` led to the creation of select instructions where both inputs were the same register:
%11:gpr64 = CSELXr %8:gpr64, %8:gpr64, 0, implicit $nzcv
This change adds a later peephole optimization that replaces such selects with unconditional moves.
---
Full diff: https://github.com/llvm/llvm-project/pull/101483.diff
3 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (+28)
- (modified) llvm/test/CodeGen/AArch64/peephole-csel.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/peephole-csel.mir (+2-2)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index f61de8ff1a4a6..5c5a9df82d7b5 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -61,6 +61,9 @@
// %6:fpr128 = IMPLICIT_DEF
// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
//
+// 8. Remove redundant CSELs that select between identical registers, by
+// replacing them with unconditional moves.
+//
//===----------------------------------------------------------------------===//
#include "AArch64ExpandImm.h"
@@ -124,6 +127,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
template <typename T>
bool visitAND(unsigned Opc, MachineInstr &MI);
bool visitORR(MachineInstr &MI);
+ bool visitCSEL(MachineInstr &MI);
bool visitINSERT(MachineInstr &MI);
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
bool visitINSvi64lane(MachineInstr &MI);
@@ -283,6 +287,26 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
return true;
}
+bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {
+ // Replace CSEL with MOV when both inputs are the same register.
+ if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())
+ return false;
+
+ auto ZeroReg =
+ MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
+ auto OrOpcode =
+ MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
+
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))
+ .addReg(MI.getOperand(0).getReg(), RegState::Define)
+ .addReg(ZeroReg)
+ .addReg(MI.getOperand(1).getReg())
+ .addImm(0);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
// Check this INSERT_SUBREG comes from below zero-extend pattern.
//
@@ -788,6 +812,10 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
{AArch64::ADDXri, AArch64::ADDSXri}, MI);
break;
+ case AArch64::CSELWr:
+ case AArch64::CSELXr:
+ Changed |= visitCSEL(MI);
+ break;
case AArch64::INSvi64gpr:
Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
break;
diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.ll b/llvm/test/CodeGen/AArch64/peephole-csel.ll
index 3f92943b11eb1..868b9f1f2f6ac 100644
--- a/llvm/test/CodeGen/AArch64/peephole-csel.ll
+++ b/llvm/test/CodeGen/AArch64/peephole-csel.ll
@@ -6,7 +6,7 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) {
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: tst w2, #0x1
; CHECK-NEXT: mov w8, #1 // =0x1
-; CHECK-NEXT: csel x9, xzr, xzr, eq
+; CHECK-NEXT: mov x9, xzr
; CHECK-NEXT: tst w1, #0x1
; CHECK-NEXT: csel x8, x8, x9, eq
; CHECK-NEXT: str x8, [x0]
diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.mir b/llvm/test/CodeGen/AArch64/peephole-csel.mir
index 5077441a33788..d424dc05c801c 100644
--- a/llvm/test/CodeGen/AArch64/peephole-csel.mir
+++ b/llvm/test/CodeGen/AArch64/peephole-csel.mir
@@ -19,7 +19,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: $xzr = ANDSXri [[COPY]], 0, implicit-def $nzcv
- ; CHECK-NEXT: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[COPY1]], [[COPY1]], 0, implicit $nzcv
+ ; CHECK-NEXT: [[ORRXrs:%[0-9]+]]:gpr64 = ORRXrs $xzr, [[COPY1]], 0
; CHECK-NEXT: RET_ReallyLR
%3:gpr64 = COPY $x1
%4:gpr64 = COPY $x0
@@ -46,7 +46,7 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
; CHECK-NEXT: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
- ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY1]], [[COPY1]], 0, implicit $nzcv
+ ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[COPY1]], 0
; CHECK-NEXT: RET_ReallyLR
%3:gpr32 = COPY $w1
%4:gpr32 = COPY $w0
``````````
</details>
https://github.com/llvm/llvm-project/pull/101483
More information about the llvm-commits
mailing list