[llvm] [AArch64] Peephole optimization to remove redundant csel instructions (PR #101483)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 1 06:22:42 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Marina (citymarina)

<details>
<summary>Changes</summary>

Given a sequence such as

  %8:gpr64 = COPY $xzr
  %10:gpr64 = COPY $xzr
  %11:gpr64 = CSELXr %8:gpr64, %10:gpr64, 0, implicit $nzcv

`PeepholeOptimizer::foldRedundantCopy` led to the creation of select instructions where both inputs were the same register:

  %11:gpr64 = CSELXr %8:gpr64, %8:gpr64, 0, implicit $nzcv

This change adds a later peephole optimization that replaces such selects with unconditional moves.

---
Full diff: https://github.com/llvm/llvm-project/pull/101483.diff


3 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (+28) 
- (modified) llvm/test/CodeGen/AArch64/peephole-csel.ll (+1-1) 
- (modified) llvm/test/CodeGen/AArch64/peephole-csel.mir (+2-2) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index f61de8ff1a4a6..5c5a9df82d7b5 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -61,6 +61,9 @@
 //   %6:fpr128 = IMPLICIT_DEF
 //   %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
 //
+// 8. Remove redundant CSELs that select between identical registers, by
+//    replacing them with unconditional moves.
+//
 //===----------------------------------------------------------------------===//
 
 #include "AArch64ExpandImm.h"
@@ -124,6 +127,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
   template <typename T>
   bool visitAND(unsigned Opc, MachineInstr &MI);
   bool visitORR(MachineInstr &MI);
+  bool visitCSEL(MachineInstr &MI);
   bool visitINSERT(MachineInstr &MI);
   bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
   bool visitINSvi64lane(MachineInstr &MI);
@@ -283,6 +287,26 @@ bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
   return true;
 }
 
+bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {
+  // Replace CSEL with MOV when both inputs are the same register.
+  if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())
+    return false;
+
+  auto ZeroReg =
+      MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
+  auto OrOpcode =
+      MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
+
+  BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))
+      .addReg(MI.getOperand(0).getReg(), RegState::Define)
+      .addReg(ZeroReg)
+      .addReg(MI.getOperand(1).getReg())
+      .addImm(0);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
   // Check this INSERT_SUBREG comes from below zero-extend pattern.
   //
@@ -788,6 +812,10 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
             visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
                                     {AArch64::ADDXri, AArch64::ADDSXri}, MI);
         break;
+      case AArch64::CSELWr:
+      case AArch64::CSELXr:
+        Changed |= visitCSEL(MI);
+        break;
       case AArch64::INSvi64gpr:
         Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
         break;
diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.ll b/llvm/test/CodeGen/AArch64/peephole-csel.ll
index 3f92943b11eb1..868b9f1f2f6ac 100644
--- a/llvm/test/CodeGen/AArch64/peephole-csel.ll
+++ b/llvm/test/CodeGen/AArch64/peephole-csel.ll
@@ -6,7 +6,7 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    tst w2, #0x1
 ; CHECK-NEXT:    mov w8, #1 // =0x1
-; CHECK-NEXT:    csel x9, xzr, xzr, eq
+; CHECK-NEXT:    mov x9, xzr
 ; CHECK-NEXT:    tst w1, #0x1
 ; CHECK-NEXT:    csel x8, x8, x9, eq
 ; CHECK-NEXT:    str x8, [x0]
diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.mir b/llvm/test/CodeGen/AArch64/peephole-csel.mir
index 5077441a33788..d424dc05c801c 100644
--- a/llvm/test/CodeGen/AArch64/peephole-csel.mir
+++ b/llvm/test/CodeGen/AArch64/peephole-csel.mir
@@ -19,7 +19,7 @@ body:             |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
     ; CHECK-NEXT: $xzr = ANDSXri [[COPY]], 0, implicit-def $nzcv
-    ; CHECK-NEXT: [[CSELXr:%[0-9]+]]:gpr64 = CSELXr [[COPY1]], [[COPY1]], 0, implicit $nzcv
+    ; CHECK-NEXT: [[ORRXrs:%[0-9]+]]:gpr64 = ORRXrs $xzr, [[COPY1]], 0
     ; CHECK-NEXT: RET_ReallyLR
     %3:gpr64 = COPY $x1
     %4:gpr64 = COPY $x0
@@ -46,7 +46,7 @@ body:             |
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
     ; CHECK-NEXT: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
-    ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[COPY1]], [[COPY1]], 0, implicit $nzcv
+    ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[COPY1]], 0
     ; CHECK-NEXT: RET_ReallyLR
     %3:gpr32 = COPY $w1
     %4:gpr32 = COPY $w0

``````````

</details>


https://github.com/llvm/llvm-project/pull/101483


More information about the llvm-commits mailing list