[llvm] 600d493 - [AArch64] Remove superfluous sxtw in peephole opt (#96293)

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 19 08:51:32 PDT 2024


Author: David Green
Date: 2024-07-19T16:51:28+01:00
New Revision: 600d4937521210eb8c5fd3e1107e50ec5cb246f2

URL: https://github.com/llvm/llvm-project/commit/600d4937521210eb8c5fd3e1107e50ec5cb246f2
DIFF: https://github.com/llvm/llvm-project/commit/600d4937521210eb8c5fd3e1107e50ec5cb246f2.diff

LOG: [AArch64] Remove superfluous sxtw in peephole opt (#96293)

Across a basic-block we might have an i32 extract from a value that only
operates on upper bits (for example a sxtw). We can replace the COPY
with a new version skipping the sxtw.

This is a re-commit of 7f2a5dfe35f8bbaca2819644c7aa844f938befd6, with a fix for
removing all the intermediate COPY nodes (and some extra debug logging).

Added: 
    llvm/lib/Target/AArch64/peephole-sxtw.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
    llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index bd11bc4dd6e3f..f61de8ff1a4a6 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
   bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
   bool visitINSvi64lane(MachineInstr &MI);
   bool visitFMOVDr(MachineInstr &MI);
+  bool visitCopy(MachineInstr &MI);
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   StringRef getPassName() const override {
@@ -690,6 +691,40 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
   return true;
 }
 
+// Across a basic-block we might have in i32 extract from a value that only
+// operates on upper bits (for example a sxtw). We can replace the COPY with a
+// new version skipping the sxtw.
+bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
+  Register InputReg = MI.getOperand(1).getReg();
+  if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
+      !MRI->hasOneNonDBGUse(InputReg))
+    return false;
+
+  MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
+  SmallPtrSet<MachineInstr *, 4> DeadInstrs;
+  DeadInstrs.insert(SrcMI);
+  while (SrcMI && SrcMI->isFullCopy() &&
+         MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) {
+    SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
+    DeadInstrs.insert(SrcMI);
+  }
+
+  if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri ||
+      SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31)
+    return false;
+
+  Register SrcReg = SrcMI->getOperand(1).getReg();
+  MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
+  LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
+  MI.getOperand(1).setReg(SrcReg);
+  LLVM_DEBUG(dbgs() << "        to: " << MI);
+  for (auto *DeadMI : DeadInstrs) {
+    LLVM_DEBUG(dbgs() << "  Removing: " << *DeadMI);
+    DeadMI->eraseFromParent();
+  }
+  return true;
+}
+
 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -771,6 +806,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
       case AArch64::FMOVDr:
         Changed |= visitFMOVDr(MI);
         break;
+      case AArch64::COPY:
+        Changed |= visitCopy(MI);
+        break;
       }
     }
   }

diff  --git a/llvm/lib/Target/AArch64/peephole-sxtw.mir b/llvm/lib/Target/AArch64/peephole-sxtw.mir
new file mode 100644
index 0000000000000..274d434bbec67
--- /dev/null
+++ b/llvm/lib/Target/AArch64/peephole-sxtw.mir
@@ -0,0 +1,97 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
+
+---
+name: removeSxtw
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x0
+    ; CHECK-LABEL: name: removeSxtw
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
+    ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0
+    ; CHECK-NEXT: $w0 = COPY [[ADDWri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = SBFMXri %0:gpr64, 0, 31
+    %2:gpr32sp = COPY %1.sub_32:gpr64
+    %3:gpr32sp = ADDWri %2:gpr32sp, 1, 0
+    $w0 = COPY %3:gpr32sp
+    RET_ReallyLR implicit $w0
+...
+---
+name: extraCopy
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x0
+    ; CHECK-LABEL: name: extraCopy
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
+    ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0
+    ; CHECK-NEXT: $w0 = COPY [[ADDWri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = SBFMXri %0:gpr64, 0, 31
+    %2:gpr64all = COPY %1:gpr64
+    %3:gpr32sp = COPY %2.sub_32:gpr64all
+    %4:gpr32sp = ADDWri %3:gpr32sp, 1, 0
+    $w0 = COPY %4:gpr32sp
+    RET_ReallyLR implicit $w0
+...
+---
+name: multipleCopies
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: multipleCopies
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:gpr64all = IMPLICIT_DEF
+  ; CHECK-NEXT:   [[INSERT_SUBREG:%[0-9]+]]:gpr64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub_32
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY [[INSERT_SUBREG]].sub_32
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
+  ; CHECK-NEXT:   [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY2]], [[COPY1]]
+  ; CHECK-NEXT:   [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[SUBWrr]], %subreg.sub_32
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.1(0x04000000), %bb.2(0x7c000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   CBZX [[SUBREG_TO_REG]], %bb.1
+  ; CHECK-NEXT:   B %bb.2
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $w0
+
+    %2:gpr32 = COPY $w0
+    %4:gpr64all = IMPLICIT_DEF
+    %3:gpr64 = INSERT_SUBREG %4, %2, %subreg.sub_32
+    %5:gpr64 = SBFMXri killed %3, 0, 31
+    %0:gpr64all = COPY %5
+    %6:gpr64all = COPY %0
+    %7:gpr32 = COPY %6.sub_32
+    %8:gpr32 = COPY $wzr
+    %9:gpr32 = SUBWrr %8, %7
+    %10:gpr32 = ORRWrs $wzr, %9, 0
+    %1:gpr64 = SUBREG_TO_REG 0, %10, %subreg.sub_32
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+    successors: %bb.1(0x04000000), %bb.2(0x7c000000)
+
+    CBZX %1, %bb.1
+    B %bb.2
+
+...

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
index 9b0701ab148dc..c57c95fd3b531 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
@@ -281,8 +281,7 @@ define i64 @smull_ldrsw_shift(ptr %x0, i64 %x1) {
 ; CHECK-LABEL: smull_ldrsw_shift:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldrsw x8, [x0]
-; CHECK-NEXT:    sxtw x9, w1
-; CHECK-NEXT:    smull x0, w8, w9
+; CHECK-NEXT:    smull x0, w8, w1
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i32, ptr %x0
@@ -490,8 +489,7 @@ define i64 @smaddl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
 ; CHECK-LABEL: smaddl_ldrsw_shift:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldrsw x8, [x0]
-; CHECK-NEXT:    sxtw x9, w1
-; CHECK-NEXT:    smaddl x0, w8, w9, x2
+; CHECK-NEXT:    smaddl x0, w8, w1, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i32, ptr %x0
@@ -654,8 +652,7 @@ define i64 @smnegl_ldrsw_shift(ptr %x0, i64 %x1) {
 ; CHECK-LABEL: smnegl_ldrsw_shift:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldrsw x8, [x0]
-; CHECK-NEXT:    sxtw x9, w1
-; CHECK-NEXT:    smnegl x0, w8, w9
+; CHECK-NEXT:    smnegl x0, w8, w1
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i32, ptr %x0
@@ -818,8 +815,7 @@ define i64 @smsubl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
 ; CHECK-LABEL: smsubl_ldrsw_shift:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldrsw x8, [x0]
-; CHECK-NEXT:    sxtw x9, w1
-; CHECK-NEXT:    smsubl x0, w8, w9, x2
+; CHECK-NEXT:    smsubl x0, w8, w1, x2
 ; CHECK-NEXT:    ret
 entry:
   %ext64 = load i32, ptr %x0
@@ -1451,3 +1447,21 @@ define i64 @umaddl_and_and(i64 %x, i64 %y, i64 %a) {
     %add = add i64 %a, %mul
     ret i64 %add
 }
+
+; Check which can contain multiple copies that should all be removed.
+define i32 @f(i32 %0) {
+entry:
+  %1 = sext i32 %0 to i64
+  br label %A
+
+A:
+  %2 = trunc i64 %1 to i32
+  %a69.us = sub i32 0, %2
+  %a69.us.fr = freeze i32 %a69.us
+  %3 = zext i32 %a69.us.fr to i64
+  br label %B
+
+B:
+  %t = icmp eq i64 0, %3
+  br i1 %t, label %A, label %B
+}


        


More information about the llvm-commits mailing list