[llvm] [AArch64] Remove superfluous sxtw in peephole opt (PR #96293)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 30 23:28:28 PDT 2024
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/96293
>From 46bd1a682286533822ce763f9bd2d4096003ca34 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Mon, 1 Jul 2024 07:28:19 +0100
Subject: [PATCH] [AArch64] Remove superfluous sxtw in peephole opt
Across a basic-block we might have in i32 extract from a value that only
operates on upper bits (for example a sxtw). We can replace the COPY with a
new version skipping the sxtw.
---
.../Target/AArch64/AArch64MIPeepholeOpt.cpp | 32 +++++++++++++
llvm/lib/Target/AArch64/peephole-sxtw.mir | 46 +++++++++++++++++++
.../CodeGen/AArch64/aarch64-mull-masks.ll | 12 ++---
3 files changed, 82 insertions(+), 8 deletions(-)
create mode 100644 llvm/lib/Target/AArch64/peephole-sxtw.mir
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 22da7ddef98a2..9c7db121fa7fd 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
bool visitINSvi64lane(MachineInstr &MI);
bool visitFMOVDr(MachineInstr &MI);
+ bool visitCopy(MachineInstr &MI);
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@@ -690,6 +691,34 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
return true;
}
+// Across a basic-block we might have in i32 extract from a value that only
+// operates on upper bits (for example a sxtw). We can replace the COPY with a
+// new version skipping the sxtw.
+bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
+ Register InputReg = MI.getOperand(1).getReg();
+ if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
+ !MRI->hasOneNonDBGUse(InputReg))
+ return false;
+
+ MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
+ MachineInstr *CopyMI = SrcMI;
+ while (SrcMI && SrcMI->isFullCopy() &&
+ MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg()))
+ SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
+
+ if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri ||
+ SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31)
+ return false;
+
+ Register SrcReg = SrcMI->getOperand(1).getReg();
+ MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
+ MI.getOperand(1).setReg(SrcReg);
+ if (CopyMI != SrcMI)
+ CopyMI->eraseFromParent();
+ SrcMI->eraseFromParent();
+ return true;
+}
+
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -771,6 +800,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
case AArch64::FMOVDr:
Changed |= visitFMOVDr(MI);
break;
+ case AArch64::COPY:
+ Changed |= visitCopy(MI);
+ break;
}
}
}
diff --git a/llvm/lib/Target/AArch64/peephole-sxtw.mir b/llvm/lib/Target/AArch64/peephole-sxtw.mir
new file mode 100644
index 0000000000000..6dd91fbf6ec1d
--- /dev/null
+++ b/llvm/lib/Target/AArch64/peephole-sxtw.mir
@@ -0,0 +1,46 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
+
+---
+name: removeSxtw
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x0
+ ; CHECK-LABEL: name: removeSxtw
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
+ ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0
+ ; CHECK-NEXT: $w0 = COPY [[ADDWri]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64 = COPY $x0
+ %1:gpr64 = SBFMXri %0:gpr64, 0, 31
+ %2:gpr32sp = COPY %1.sub_32:gpr64
+ %3:gpr32sp = ADDWri %2:gpr32sp, 1, 0
+ $w0 = COPY %3:gpr32sp
+ RET_ReallyLR implicit $w0
+...
+---
+name: extraCopy
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x0
+ ; CHECK-LABEL: name: extraCopy
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
+ ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0
+ ; CHECK-NEXT: $w0 = COPY [[ADDWri]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64 = COPY $x0
+ %1:gpr64 = SBFMXri %0:gpr64, 0, 31
+ %2:gpr64all = COPY %1:gpr64
+ %3:gpr32sp = COPY %2.sub_32:gpr64all
+ %4:gpr32sp = ADDWri %3:gpr32sp, 1, 0
+ $w0 = COPY %4:gpr32sp
+ RET_ReallyLR implicit $w0
+...
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
index e41eb7d38c370..058cbbe9ff13c 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
@@ -281,8 +281,7 @@ define i64 @smull_ldrsw_shift(ptr %x0, i64 %x1) {
; CHECK-LABEL: smull_ldrsw_shift:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrsw x8, [x0]
-; CHECK-NEXT: sxtw x9, w1
-; CHECK-NEXT: smull x0, w8, w9
+; CHECK-NEXT: smull x0, w8, w1
; CHECK-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
@@ -490,8 +489,7 @@ define i64 @smaddl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
; CHECK-LABEL: smaddl_ldrsw_shift:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrsw x8, [x0]
-; CHECK-NEXT: sxtw x9, w1
-; CHECK-NEXT: smaddl x0, w8, w9, x2
+; CHECK-NEXT: smaddl x0, w8, w1, x2
; CHECK-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
@@ -654,8 +652,7 @@ define i64 @smnegl_ldrsw_shift(ptr %x0, i64 %x1) {
; CHECK-LABEL: smnegl_ldrsw_shift:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrsw x8, [x0]
-; CHECK-NEXT: sxtw x9, w1
-; CHECK-NEXT: smnegl x0, w8, w9
+; CHECK-NEXT: smnegl x0, w8, w1
; CHECK-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
@@ -818,8 +815,7 @@ define i64 @smsubl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) {
; CHECK-LABEL: smsubl_ldrsw_shift:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldrsw x8, [x0]
-; CHECK-NEXT: sxtw x9, w1
-; CHECK-NEXT: smsubl x0, w8, w9, x2
+; CHECK-NEXT: smsubl x0, w8, w1, x2
; CHECK-NEXT: ret
entry:
%ext64 = load i32, ptr %x0
More information about the llvm-commits
mailing list