[llvm] fe946bf - [AArch64] Extend sxtw peephole to uxtw. (#104516)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 06:46:24 PDT 2024
Author: David Green
Date: 2024-08-20T14:46:19+01:00
New Revision: fe946bfb728481a249282fb1e14378bf444defd3
URL: https://github.com/llvm/llvm-project/commit/fe946bfb728481a249282fb1e14378bf444defd3
DIFF: https://github.com/llvm/llvm-project/commit/fe946bfb728481a249282fb1e14378bf444defd3.diff
LOG: [AArch64] Extend sxtw peephole to uxtw. (#104516)
This extends the existing sxtw peephole optimization (#96293) to uxtw,
which in llvm is a ORRWrr which clears the top bits.
Fixes #98481
Added:
Modified:
llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
llvm/test/CodeGen/AArch64/peephole-sxtw.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 5b1e79b9fad3e..6a18c4516418b 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -733,11 +733,42 @@ bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
DeadInstrs.insert(SrcMI);
}
- if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri ||
- SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31)
+ if (!SrcMI)
+ return false;
+
+ // Look for SXTW(X) and return Reg.
+ auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register {
+ if (SrcMI->getOpcode() != AArch64::SBFMXri ||
+ SrcMI->getOperand(2).getImm() != 0 ||
+ SrcMI->getOperand(3).getImm() != 31)
+ return AArch64::NoRegister;
+ return SrcMI->getOperand(1).getReg();
+ };
+ // Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32)))
+ auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register {
+ if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG ||
+ SrcMI->getOperand(3).getImm() != AArch64::sub_32 ||
+ !MRI->hasOneNonDBGUse(SrcMI->getOperand(2).getReg()))
+ return AArch64::NoRegister;
+ MachineInstr *Orr = MRI->getUniqueVRegDef(SrcMI->getOperand(2).getReg());
+ if (!Orr || Orr->getOpcode() != AArch64::ORRWrr ||
+ Orr->getOperand(1).getReg() != AArch64::WZR ||
+ !MRI->hasOneNonDBGUse(Orr->getOperand(2).getReg()))
+ return AArch64::NoRegister;
+ MachineInstr *Cpy = MRI->getUniqueVRegDef(Orr->getOperand(2).getReg());
+ if (!Cpy || Cpy->getOpcode() != AArch64::COPY ||
+ Cpy->getOperand(1).getSubReg() != AArch64::sub_32)
+ return AArch64::NoRegister;
+ DeadInstrs.insert(Orr);
+ return Cpy->getOperand(1).getReg();
+ };
+
+ Register SrcReg = getSXTWSrcReg(SrcMI);
+ if (!SrcReg)
+ SrcReg = getUXTWSrcReg(SrcMI);
+ if (!SrcReg)
return false;
- Register SrcReg = SrcMI->getOperand(1).getReg();
MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
MI.getOperand(1).setReg(SrcReg);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
index c57c95fd3b531..cf913db187bc7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
@@ -997,8 +997,7 @@ define i64 @umull_ldr2_d(ptr %x0, i64 %x1) {
; CHECK-LABEL: umull_ldr2_d:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: mov w9, w1
-; CHECK-NEXT: umull x0, w8, w9
+; CHECK-NEXT: umull x0, w8, w1
; CHECK-NEXT: ret
entry:
%ext64 = load i64, ptr %x0
@@ -1110,8 +1109,7 @@ define i64 @umaddl_ldr2_d(ptr %x0, i64 %x1, i64 %x2) {
; CHECK-LABEL: umaddl_ldr2_d:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: mov w9, w1
-; CHECK-NEXT: umaddl x0, w8, w9, x2
+; CHECK-NEXT: umaddl x0, w8, w1, x2
; CHECK-NEXT: ret
entry:
%ext64 = load i64, ptr %x0
@@ -1224,8 +1222,7 @@ define i64 @umnegl_ldr2_d(ptr %x0, i64 %x1) {
; CHECK-LABEL: umnegl_ldr2_d:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: mov w9, w1
-; CHECK-NEXT: umnegl x0, w8, w9
+; CHECK-NEXT: umnegl x0, w8, w1
; CHECK-NEXT: ret
entry:
%ext64 = load i64, ptr %x0
@@ -1338,8 +1335,7 @@ define i64 @umsubl_ldr2_d(ptr %x0, i64 %x1, i64 %x2) {
; CHECK-LABEL: umsubl_ldr2_d:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ldr w8, [x0]
-; CHECK-NEXT: mov w9, w1
-; CHECK-NEXT: umsubl x0, w8, w9, x2
+; CHECK-NEXT: umsubl x0, w8, w1, x2
; CHECK-NEXT: ret
entry:
%ext64 = load i64, ptr %x0
@@ -1400,8 +1396,7 @@ define i64 @umull_and_lshr(i64 %x) {
; CHECK-LABEL: umull_and_lshr:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32
-; CHECK-NEXT: mov w9, w0
-; CHECK-NEXT: umull x0, w9, w8
+; CHECK-NEXT: umull x0, w0, w8
; CHECK-NEXT: ret
%lo = and i64 %x, u0xffffffff
%hi = lshr i64 %x, 32
@@ -1424,8 +1419,7 @@ define i64 @umaddl_and_lshr(i64 %x, i64 %a) {
; CHECK-LABEL: umaddl_and_lshr:
; CHECK: // %bb.0:
; CHECK-NEXT: lsr x8, x0, #32
-; CHECK-NEXT: mov w9, w0
-; CHECK-NEXT: umaddl x0, w9, w8, x1
+; CHECK-NEXT: umaddl x0, w0, w8, x1
; CHECK-NEXT: ret
%lo = and i64 %x, u0xffffffff
%hi = lshr i64 %x, 32
@@ -1437,9 +1431,7 @@ define i64 @umaddl_and_lshr(i64 %x, i64 %a) {
define i64 @umaddl_and_and(i64 %x, i64 %y, i64 %a) {
; CHECK-LABEL: umaddl_and_and:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: mov w9, w1
-; CHECK-NEXT: umaddl x0, w8, w9, x2
+; CHECK-NEXT: umaddl x0, w0, w1, x2
; CHECK-NEXT: ret
%lo = and i64 %x, u0xffffffff
%hi = and i64 %y, u0xffffffff
diff --git a/llvm/test/CodeGen/AArch64/peephole-sxtw.mir b/llvm/test/CodeGen/AArch64/peephole-sxtw.mir
index 22eec1a4dc038..b57b2b71dc707 100644
--- a/llvm/test/CodeGen/AArch64/peephole-sxtw.mir
+++ b/llvm/test/CodeGen/AArch64/peephole-sxtw.mir
@@ -106,9 +106,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]].sub_32
- ; CHECK-NEXT: [[ORRWrr:%[0-9]+]]:gpr32 = ORRWrr $wzr, [[COPY1]]
- ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[ORRWrr]], %subreg.sub_32
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32sp = COPY [[SUBREG_TO_REG]].sub_32
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32
; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY2]], 1, 0
; CHECK-NEXT: $w0 = COPY [[ADDWri]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
More information about the llvm-commits
mailing list