[llvm] [AArch64][Peephole] Remove redundant mask for `ldrb + and 0xff` (PR #189518)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 30 20:51:09 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Henry Jiang (mustartt)
<details>
<summary>Changes</summary>
This patch removes a redundant `ldrb + and 0xff` and other various mask width. Since `ldrb wN, ptr` already zero-extends in to `wN`, the mask is unnecessary and can be eliminated by peephole. This is meant to complement the fold already present in DAGCombiner, but across basic blocks.
---
Full diff: https://github.com/llvm/llvm-project/pull/189518.diff
2 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (+51)
- (added) llvm/test/CodeGen/AArch64/peephole-redundant-and-mask.mir (+282)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 900f22abcf8b0..8e9959f4972e4 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -122,6 +122,8 @@ class AArch64MIPeepholeOptImpl {
bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
MachineInstr *&SubregToRegMI);
+ bool removeRedundantAndMask(MachineInstr &MI);
+
template <typename T>
bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
template <typename T>
@@ -652,6 +654,51 @@ bool AArch64MIPeepholeOptImpl::splitTwoPartImm(MachineInstr &MI,
return true;
}
+// Remove AND[W|X]ri when the mask is redundant because the source
+// operand's width already guarantees the upper bits are zero.
+// %1:gpr32common = LDRBBui %0, 0
+// %2:gpr32common = ANDWri %1, #0xff
+// All uses of %2 are replaced by %1, since the load is already zero extending.
+bool AArch64MIPeepholeOptImpl::removeRedundantAndMask(MachineInstr &MI) {
+ assert((MI.getOpcode() == AArch64::ANDWri ||
+ MI.getOpcode() == AArch64::ANDXri) &&
+ "Unsupported masking instructions");
+
+ unsigned RegSize = MI.getOpcode() == AArch64::ANDWri ? 32 : 64;
+ auto EncodedImm = MI.getOperand(2).getImm();
+ uint64_t Mask = AArch64_AM::decodeLogicalImmediate(EncodedImm, RegSize);
+
+ MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
+ if (!SrcMI || !SrcMI->hasOneMemOperand())
+ return false;
+
+ const MachineMemOperand *MMO = *SrcMI->memoperands_begin();
+ if (!MMO || !MMO->isLoad())
+ return false;
+
+ if (!AArch64InstrInfo::isZExtLoad(*SrcMI))
+ return false;
+
+ LocationSize Bits = MMO->getSizeInBits();
+ if (!Bits.hasValue() || Bits.isScalable())
+ return false;
+ uint64_t LoadSize = Bits.getValue().getFixedValue();
+ if (Mask != maskTrailingOnes<uint64_t>(LoadSize))
+ return false;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = SrcMI->getOperand(0).getReg();
+ if (DstReg.isVirtual()) {
+ MRI->replaceRegWith(DstReg, SrcReg);
+ } else {
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DstReg)
+ .addReg(SrcReg);
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64MIPeepholeOptImpl::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
// Check if this INSvi[X]gpr comes from COPY of a source FPR128
//
@@ -970,6 +1017,10 @@ bool AArch64MIPeepholeOptImpl::run(MachineFunction &MF) {
Changed |= trySplitLogicalImm<uint64_t>(
AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);
break;
+ case AArch64::ANDXri:
+ case AArch64::ANDWri:
+ Changed |= removeRedundantAndMask(MI);
+ break;
case AArch64::EORWrr:
Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,
SplitStrategy::Disjoint);
diff --git a/llvm/test/CodeGen/AArch64/peephole-redundant-and-mask.mir b/llvm/test/CodeGen/AArch64/peephole-redundant-and-mask.mir
new file mode 100644
index 0000000000000..89b3299d440ba
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/peephole-redundant-and-mask.mir
@@ -0,0 +1,282 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
+---
+name: zext_byte_load_ui
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: zext_byte_load_ui
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32common = LDRBBui [[COPY]], 0 :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LDRBBui]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr32common = LDRBBui %0, 0 :: (load (s8))
+ %2:gpr32common = ANDWri %1, 7
+ $w0 = COPY %2
+ RET_ReallyLR implicit $w0
+...
+---
+name: zext_byte_load_ur
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: zext_byte_load_ur
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDURBBi:%[0-9]+]]:gpr32common = LDURBBi [[COPY]], 1 :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LDURBBi]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr32common = LDURBBi %0, 1 :: (load (s8))
+ %2:gpr32common = ANDWri %1, 7
+ $w0 = COPY %2
+ RET_ReallyLR implicit $w0
+...
+---
+name: zext_byte_load_roX
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: zext_byte_load_roX
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[LDRBBroX:%[0-9]+]]:gpr32common = LDRBBroX [[COPY]], [[COPY1]], 0, 0 :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LDRBBroX]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr64 = COPY $x1
+ %2:gpr32common = LDRBBroX %0, %1, 0, 0 :: (load (s8))
+ %3:gpr32common = ANDWri %2, 7
+ $w0 = COPY %3
+ RET_ReallyLR implicit $w0
+...
+---
+name: zext_byte_load_roW
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $w1
+ ; CHECK-LABEL: name: zext_byte_load_roW
+ ; CHECK: liveins: $x0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[LDRBBroW:%[0-9]+]]:gpr32common = LDRBBroW [[COPY]], [[COPY1]], 0, 0 :: (load (s8))
+ ; CHECK-NEXT: $w0 = COPY [[LDRBBroW]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr32 = COPY $w1
+ %2:gpr32common = LDRBBroW %0, %1, 0, 0 :: (load (s8))
+ %3:gpr32common = ANDWri %2, 7
+ $w0 = COPY %3
+ RET_ReallyLR implicit $w0
+...
+---
+name: zext_half_load_ui
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: zext_half_load_ui
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32common = LDRHHui [[COPY]], 0 :: (load (s16))
+ ; CHECK-NEXT: $w0 = COPY [[LDRHHui]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr32common = LDRHHui %0, 0 :: (load (s16))
+ %2:gpr32common = ANDWri %1, 15
+ $w0 = COPY %2
+ RET_ReallyLR implicit $w0
+...
+---
+name: zext_half_load_ur
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: zext_half_load_ur
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDURHHi:%[0-9]+]]:gpr32common = LDURHHi [[COPY]], 2 :: (load (s16))
+ ; CHECK-NEXT: $w0 = COPY [[LDURHHi]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr32common = LDURHHi %0, 2 :: (load (s16))
+ %2:gpr32common = ANDWri %1, 15
+ $w0 = COPY %2
+ RET_ReallyLR implicit $w0
+...
+---
+name: zext_half_load_roX
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: zext_half_load_roX
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32common = LDRHHroX [[COPY]], [[COPY1]], 0, 0 :: (load (s16))
+ ; CHECK-NEXT: $w0 = COPY [[LDRHHroX]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr64 = COPY $x1
+ %2:gpr32common = LDRHHroX %0, %1, 0, 0 :: (load (s16))
+ %3:gpr32common = ANDWri %2, 15
+ $w0 = COPY %3
+ RET_ReallyLR implicit $w0
+...
+---
+name: zext_half_load_roW
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $w1
+ ; CHECK-LABEL: name: zext_half_load_roW
+ ; CHECK: liveins: $x0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[LDRHHroW:%[0-9]+]]:gpr32common = LDRHHroW [[COPY]], [[COPY1]], 0, 0 :: (load (s16))
+ ; CHECK-NEXT: $w0 = COPY [[LDRHHroW]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr32 = COPY $w1
+ %2:gpr32common = LDRHHroW %0, %1, 0, 0 :: (load (s16))
+ %3:gpr32common = ANDWri %2, 15
+ $w0 = COPY %3
+ RET_ReallyLR implicit $w0
+...
+---
+name: sext_byte_load_not_removed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: sext_byte_load_not_removed
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDRSBWui:%[0-9]+]]:gpr32common = LDRSBWui [[COPY]], 0 :: (load (s8))
+ ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[LDRSBWui]], 7
+ ; CHECK-NEXT: $w0 = COPY [[ANDWri]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr32common = LDRSBWui %0, 0 :: (load (s8))
+ %2:gpr32common = ANDWri %1, 7
+ $w0 = COPY %2
+ RET_ReallyLR implicit $w0
+...
+---
+name: sext_half_load_not_removed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: sext_half_load_not_removed
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDRSHWui:%[0-9]+]]:gpr32common = LDRSHWui [[COPY]], 0 :: (load (s16))
+ ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[LDRSHWui]], 15
+ ; CHECK-NEXT: $w0 = COPY [[ANDWri]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr32common = LDRSHWui %0, 0 :: (load (s16))
+ %2:gpr32common = ANDWri %1, 15
+ $w0 = COPY %2
+ RET_ReallyLR implicit $w0
+...
+---
+name: wrong_mask_not_removed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: wrong_mask_not_removed
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32common = LDRBBui [[COPY]], 0 :: (load (s8))
+ ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[LDRBBui]], 15
+ ; CHECK-NEXT: $w0 = COPY [[ANDWri]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr32common = LDRBBui %0, 0 :: (load (s8))
+ %2:gpr32common = ANDWri %1, 15
+ $w0 = COPY %2
+ RET_ReallyLR implicit $w0
+...
+---
+name: small_mask_not_removed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: small_mask_not_removed
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32common = LDRBBui [[COPY]], 0 :: (load (s8))
+ ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[LDRBBui]], 3
+ ; CHECK-NEXT: $w0 = COPY [[ANDWri]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr64common = COPY $x0
+ %1:gpr32common = LDRBBui %0, 0 :: (load (s8))
+ %2:gpr32common = ANDWri %1, 3
+ $w0 = COPY %2
+ RET_ReallyLR implicit $w0
+...
+---
+name: non_load_not_removed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: non_load_not_removed
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32common = ADDWrr [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[ADDWrr]], 7
+ ; CHECK-NEXT: $w0 = COPY [[ANDWri]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %0:gpr32 = COPY $w0
+ %1:gpr32 = COPY $w1
+ %2:gpr32common = ADDWrr %0, %1
+ %3:gpr32common = ANDWri %2, 7
+ $w0 = COPY %3
+ RET_ReallyLR implicit $w0
+...
+---
+name: physreg_dest_not_removed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: physreg_dest_not_removed
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16))
+ ; CHECK-NEXT: $w2 = COPY [[LDRHHui]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w2
+ %0:gpr64common = COPY $x0
+ %1:gpr32 = LDRHHui %0, 0 :: (load (s16))
+ $w2 = ANDWri %1, 15
+ RET_ReallyLR implicit $w2
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/189518
More information about the llvm-commits
mailing list