[llvm] [AArch64] Replace AND with LSL#2 for LDR target (#34101) (PR #89531)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 20 22:31:54 PDT 2024
https://github.com/ParkHanbum created https://github.com/llvm/llvm-project/pull/89531
Currently, process of replacing bitwise operations consisting of
`LSR`/`LSL` with `And` is performed by `DAGCombiner`.
However, in certain cases, the `AND` generated by this process
can be removed.
Consider following case:
```
lsr x8, x8, #56
and x8, x8, #0xfc
ldr w0, [x2, x8]
ret
```
In this case, we can remove the `AND` by changing the target of `LDR`
to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58.
after changed:
```
lsr x8, x8, #58
ldr w0, [x2, x8, lsl #2]
ret
```
This patch checks to see if the `SHIFTING` + `AND` operation on load
target can be optimized and optimizes it if it can.
>From ee428cfa7b40deb4fa42056ad385a1b8b4b486a8 Mon Sep 17 00:00:00 2001
From: Hanbum Park <kese111 at gmail.com>
Date: Sun, 21 Apr 2024 14:28:50 +0900
Subject: [PATCH 1/2] [AArch64] Add test for comparing
---
llvm/test/CodeGen/AArch64/peephole-load.mir | 162 ++++++++++++++++++++
1 file changed, 162 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/peephole-load.mir
diff --git a/llvm/test/CodeGen/AArch64/peephole-load.mir b/llvm/test/CodeGen/AArch64/peephole-load.mir
new file mode 100644
index 00000000000000..df1c9a3977e79f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/peephole-load.mir
@@ -0,0 +1,162 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
+
+---
+name: transform_lsr_and_ldr_to_lsr_ldr2
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: transform_lsr_and_ldr_to_lsr_ldr2
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY1]], [[COPY2]], $xzr
+ ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 56, 63
+ ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri killed [[UBFMXri]], 8069
+ ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[ANDXri]], 0, 0
+ ; CHECK-NEXT: $w0 = COPY [[LDRWroX]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:gpr64common = COPY $x2
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %3:gpr64 = MADDXrrr %1, %0, $xzr
+ %4:gpr64 = UBFMXri killed %3, 56, 63
+ %5:gpr64common = ANDXri killed %4, 8069
+ %6:gpr32 = LDRWroX %2, killed %5, 0, 0
+ $w0 = COPY %6
+ RET_ReallyLR implicit $w0
+...
+---
+name: transform_lsl1_and_ldr_to_lsr1_ldr2
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: transform_lsl1_and_ldr_to_lsr1_ldr2
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY1]], [[COPY2]], $xzr
+ ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 63, 62
+ ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri killed [[UBFMXri]], 8125
+ ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[ANDXri]], 0, 0
+ ; CHECK-NEXT: $w0 = COPY [[LDRWroX]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:gpr64common = COPY $x2
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %3:gpr64 = MADDXrrr %1, %0, $xzr
+ %4:gpr64 = UBFMXri killed %3, 63, 62
+ %5:gpr64common = ANDXri killed %4, 8125
+ %6:gpr32 = LDRWroX %2, killed %5, 0, 0
+ $w0 = COPY %6
+ RET_ReallyLR implicit $w0
+...
+---
+name: donot_transform_and_ldr
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: donot_transform_and_ldr
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY1]], [[COPY2]], $xzr
+ ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri killed [[MADDXrrr]], 8125
+ ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[ANDXri]], 0, 0
+ ; CHECK-NEXT: $w0 = COPY [[LDRWroX]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:gpr64common = COPY $x2
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %3:gpr64 = MADDXrrr %1, %0, $xzr
+ %4:gpr64common = ANDXri killed %3, 8125
+ %5:gpr32 = LDRWroX %2, killed %4, 0, 0
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+...
+---
+name: donot_transform_if_not_lsl
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: donot_transform_if_not_lsl
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY1]], [[COPY2]], $xzr
+ ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 64, 62
+ ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri killed [[UBFMXri]], 8125
+ ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[ANDXri]], 0, 0
+ ; CHECK-NEXT: $w0 = COPY [[LDRWroX]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:gpr64common = COPY $x2
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %3:gpr64 = MADDXrrr %1, %0, $xzr
+ %4:gpr64 = UBFMXri killed %3, 64, 62
+ %5:gpr64common = ANDXri killed %4, 8125
+ %6:gpr32 = LDRWroX %2, killed %5, 0, 0
+ $w0 = COPY %6
+ RET_ReallyLR implicit $w0
+...
+---
+name: donot_transform_if_not_lsr
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: donot_transform_if_not_lsr
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY1]], [[COPY2]], $xzr
+ ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 62, 62
+ ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri killed [[UBFMXri]], 8069
+ ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[ANDXri]], 0, 0
+ ; CHECK-NEXT: $w0 = COPY [[LDRWroX]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:gpr64common = COPY $x2
+ %1:gpr64 = COPY $x1
+ %0:gpr64 = COPY $x0
+ %3:gpr64 = MADDXrrr %1, %0, $xzr
+ %4:gpr64 = UBFMXri killed %3, 62, 62
+ %5:gpr64common = ANDXri killed %4, 8069
+ %6:gpr32 = LDRWroX %2, killed %5, 0, 0
+ $w0 = COPY %6
+ RET_ReallyLR implicit $w0
+...
+---
+name: donot_transform_if_not_exist_and_and_lsl
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $x0, $x2
+ ; CHECK-LABEL: name: donot_transform_if_not_exist_and_and_lsl
+ ; CHECK: liveins: $x0, $x2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[COPY1]], 61, 60
+ ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[UBFMXri]], 0, 0
+ ; CHECK-NEXT: $w0 = COPY [[LDRWroX]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %2:gpr64common = COPY $x2
+ %0:gpr64 = COPY $x0
+ %3:gpr64 = UBFMXri %0, 61, 60
+ %4:gpr32 = LDRWroX %2, killed %3, 0, 0
+ $w0 = COPY %4
+ RET_ReallyLR implicit $w0
>From feebf129a4c8e3296e82f11d758181985b5a59c8 Mon Sep 17 00:00:00 2001
From: Hanbum Park <kese111 at gmail.com>
Date: Sun, 21 Apr 2024 14:29:41 +0900
Subject: [PATCH 2/2] [AArch64] Replace AND with LSL#2 for LDR target (#34101)
Currently, process of replacing bitwise operations consisting of
`LSR`/`LSL` with `And` is performed by `DAGCombiner`.
However, in certain cases, the `AND` generated by this process
can be removed.
Consider following case:
```
lsr x8, x8, #56
and x8, x8, #0xfc
ldr w0, [x2, x8]
ret
```
In this case, we can remove the `AND` by changing the target of `LDR`
to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58.
after changed:
```
lsr x8, x8, #58
ldr w0, [x2, x8, lsl #2]
ret
```
This patch checks to see if the `SHIFTING` + `AND` operation on load
target can be optimized and optimizes it if it can.
---
.../Target/AArch64/AArch64MIPeepholeOpt.cpp | 62 +++++++++++++++++++
llvm/test/CodeGen/AArch64/peephole-load.mir | 10 ++-
2 files changed, 66 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 22da7ddef98a2a..1c331c88042317 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
bool visitINSvi64lane(MachineInstr &MI);
bool visitFMOVDr(MachineInstr &MI);
+ bool visitLOAD(MachineInstr &MI);
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@@ -690,6 +691,64 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
return true;
}
+bool AArch64MIPeepholeOpt::visitLOAD(MachineInstr &MI) {
+ Register LdOp2Reg = MI.getOperand(2).getReg();
+ unsigned RegSize = TRI->getRegSizeInBits(LdOp2Reg, *MRI);
+
+ // Consider:
+ // (ldr w, [x, (and x, (ubfm x, x, imms, immr), C1)])
+ // If bitmask C1 of And is all the bits remaining after
+ // bitshifting to UBFM minus last 2 bits, try to optimize.
+ // Optimize to:
+ // (ldr w, [x (ubfm x, x, imms, immr), lsl #2])
+ {
+ if (!MI.getOperand(4).isImm() || MI.getOperand(4).getImm() != 0)
+ return false;
+
+ MachineInstr *AndMI = MRI->getUniqueVRegDef(LdOp2Reg);
+ if (!AndMI || AndMI->getOpcode() != AArch64::ANDXri ||
+ !AndMI->getOperand(2).isImm())
+ return false;
+
+ uint64_t AndMask = AArch64_AM::decodeLogicalImmediate(
+ AndMI->getOperand(2).getImm(), RegSize);
+ MachineInstr *ShtMI = MRI->getUniqueVRegDef(AndMI->getOperand(1).getReg());
+ uint64_t Mask = 0;
+ if (!ShtMI || ShtMI->getOpcode() != AArch64::UBFMXri)
+ return false;
+ uint64_t imms = ShtMI->getOperand(2).getImm();
+ uint64_t immr = ShtMI->getOperand(3).getImm();
+ uint64_t new_imms = 0;
+ uint64_t new_immr = 0;
+ if (imms <= immr) {
+ if (immr != RegSize - 1)
+ return false;
+ Mask = ((uint64_t)1 << (RegSize - imms)) - 4;
+ new_imms = imms+2;
+ new_immr = immr;
+ } else {
+ // we only need to handle case lsl #1
+ if ((imms - immr != 1) || imms != RegSize - 1)
+ return false;
+ Mask = UINT64_MAX - 3;
+ new_imms = 1;
+ new_immr = imms;
+ }
+
+ // check this shifting can be treat as PreIndex Shifting.
+ if (AndMask == Mask) {
+ AndMI->eraseFromParent();
+ ShtMI->getOperand(2).setImm(new_imms);
+ ShtMI->getOperand(3).setImm(new_immr);
+ MI.getOperand(2).setReg(ShtMI->getOperand(0).getReg());
+ MI.getOperand(4).setImm(1);
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -771,6 +830,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
case AArch64::FMOVDr:
Changed |= visitFMOVDr(MI);
break;
+ case AArch64::LDRWroX:
+ Changed |= visitLOAD(MI);
+ break;
}
}
}
diff --git a/llvm/test/CodeGen/AArch64/peephole-load.mir b/llvm/test/CodeGen/AArch64/peephole-load.mir
index df1c9a3977e79f..8c9555a48997b9 100644
--- a/llvm/test/CodeGen/AArch64/peephole-load.mir
+++ b/llvm/test/CodeGen/AArch64/peephole-load.mir
@@ -14,9 +14,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY1]], [[COPY2]], $xzr
- ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 56, 63
- ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri killed [[UBFMXri]], 8069
- ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[ANDXri]], 0, 0
+ ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 58, 63
+ ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[UBFMXri]], 0, 1
; CHECK-NEXT: $w0 = COPY [[LDRWroX]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:gpr64common = COPY $x2
@@ -42,9 +41,8 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: [[MADDXrrr:%[0-9]+]]:gpr64 = MADDXrrr [[COPY1]], [[COPY2]], $xzr
- ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 63, 62
- ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri killed [[UBFMXri]], 8125
- ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[ANDXri]], 0, 0
+ ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri killed [[MADDXrrr]], 1, 63
+ ; CHECK-NEXT: [[LDRWroX:%[0-9]+]]:gpr32 = LDRWroX [[COPY]], killed [[UBFMXri]], 0, 1
; CHECK-NEXT: $w0 = COPY [[LDRWroX]]
; CHECK-NEXT: RET_ReallyLR implicit $w0
%2:gpr64common = COPY $x2
More information about the llvm-commits
mailing list