[llvm] [AArch64][Peephole] Remove redundant mask for `ldrb + and 0xff` (PR #189518)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 30 20:51:09 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Henry Jiang (mustartt)

<details>
<summary>Changes</summary>

This patch removes a redundant `ldrb + and 0xff` and other various mask width. Since `ldrb wN, ptr` already zero-extends in to `wN`, the mask is unnecessary and can be eliminated by peephole. This is meant to complement the fold already present in DAGCombiner, but across basic blocks. 



---
Full diff: https://github.com/llvm/llvm-project/pull/189518.diff


2 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp (+51) 
- (added) llvm/test/CodeGen/AArch64/peephole-redundant-and-mask.mir (+282) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 900f22abcf8b0..8e9959f4972e4 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -122,6 +122,8 @@ class AArch64MIPeepholeOptImpl {
   bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
                         MachineInstr *&SubregToRegMI);
 
+  bool removeRedundantAndMask(MachineInstr &MI);
+
   template <typename T>
   bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
   template <typename T>
@@ -652,6 +654,51 @@ bool AArch64MIPeepholeOptImpl::splitTwoPartImm(MachineInstr &MI,
   return true;
 }
 
+// Remove AND[W|X]ri when the mask is redundant because the source
+// operand's width already guarantees the upper bits are zero.
+//   %1:gpr32common = LDRBBui %0, 0
+//   %2:gpr32common = ANDWri %1, #0xff
+// All uses of %2 are replaced by %1, since the load is already zero extending.
+bool AArch64MIPeepholeOptImpl::removeRedundantAndMask(MachineInstr &MI) {
+  assert((MI.getOpcode() == AArch64::ANDWri ||
+          MI.getOpcode() == AArch64::ANDXri) &&
+         "Unsupported masking instructions");
+
+  unsigned RegSize = MI.getOpcode() == AArch64::ANDWri ? 32 : 64;
+  auto EncodedImm = MI.getOperand(2).getImm();
+  uint64_t Mask = AArch64_AM::decodeLogicalImmediate(EncodedImm, RegSize);
+
+  MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
+  if (!SrcMI || !SrcMI->hasOneMemOperand())
+    return false;
+
+  const MachineMemOperand *MMO = *SrcMI->memoperands_begin();
+  if (!MMO || !MMO->isLoad())
+    return false;
+
+  if (!AArch64InstrInfo::isZExtLoad(*SrcMI))
+    return false;
+
+  LocationSize Bits = MMO->getSizeInBits();
+  if (!Bits.hasValue() || Bits.isScalable())
+    return false;
+  uint64_t LoadSize = Bits.getValue().getFixedValue();
+  if (Mask != maskTrailingOnes<uint64_t>(LoadSize))
+    return false;
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = SrcMI->getOperand(0).getReg();
+  if (DstReg.isVirtual()) {
+    MRI->replaceRegWith(DstReg, SrcReg);
+  } else {
+    BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
+            DstReg)
+        .addReg(SrcReg);
+  }
+  MI.eraseFromParent();
+  return true;
+}
+
 bool AArch64MIPeepholeOptImpl::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
   // Check if this INSvi[X]gpr comes from COPY of a source FPR128
   //
@@ -970,6 +1017,10 @@ bool AArch64MIPeepholeOptImpl::run(MachineFunction &MF) {
         Changed |= trySplitLogicalImm<uint64_t>(
             AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);
         break;
+      case AArch64::ANDXri:
+      case AArch64::ANDWri:
+        Changed |= removeRedundantAndMask(MI);
+        break;
       case AArch64::EORWrr:
         Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,
                                                 SplitStrategy::Disjoint);
diff --git a/llvm/test/CodeGen/AArch64/peephole-redundant-and-mask.mir b/llvm/test/CodeGen/AArch64/peephole-redundant-and-mask.mir
new file mode 100644
index 0000000000000..89b3299d440ba
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/peephole-redundant-and-mask.mir
@@ -0,0 +1,282 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
+---
+name: zext_byte_load_ui
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: zext_byte_load_ui
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32common = LDRBBui [[COPY]], 0 :: (load (s8))
+    ; CHECK-NEXT: $w0 = COPY [[LDRBBui]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr32common = LDRBBui %0, 0 :: (load (s8))
+    %2:gpr32common = ANDWri %1, 7
+    $w0 = COPY %2
+    RET_ReallyLR implicit $w0
+...
+---
+name: zext_byte_load_ur
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: zext_byte_load_ur
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[LDURBBi:%[0-9]+]]:gpr32common = LDURBBi [[COPY]], 1 :: (load (s8))
+    ; CHECK-NEXT: $w0 = COPY [[LDURBBi]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr32common = LDURBBi %0, 1 :: (load (s8))
+    %2:gpr32common = ANDWri %1, 7
+    $w0 = COPY %2
+    RET_ReallyLR implicit $w0
+...
+---
+name: zext_byte_load_roX
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: zext_byte_load_roX
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[LDRBBroX:%[0-9]+]]:gpr32common = LDRBBroX [[COPY]], [[COPY1]], 0, 0 :: (load (s8))
+    ; CHECK-NEXT: $w0 = COPY [[LDRBBroX]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:gpr32common = LDRBBroX %0, %1, 0, 0 :: (load (s8))
+    %3:gpr32common = ANDWri %2, 7
+    $w0 = COPY %3
+    RET_ReallyLR implicit $w0
+...
+---
+name: zext_byte_load_roW
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0, $w1
+    ; CHECK-LABEL: name: zext_byte_load_roW
+    ; CHECK: liveins: $x0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[LDRBBroW:%[0-9]+]]:gpr32common = LDRBBroW [[COPY]], [[COPY1]], 0, 0 :: (load (s8))
+    ; CHECK-NEXT: $w0 = COPY [[LDRBBroW]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr32 = COPY $w1
+    %2:gpr32common = LDRBBroW %0, %1, 0, 0 :: (load (s8))
+    %3:gpr32common = ANDWri %2, 7
+    $w0 = COPY %3
+    RET_ReallyLR implicit $w0
+...
+---
+name: zext_half_load_ui
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: zext_half_load_ui
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32common = LDRHHui [[COPY]], 0 :: (load (s16))
+    ; CHECK-NEXT: $w0 = COPY [[LDRHHui]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr32common = LDRHHui %0, 0 :: (load (s16))
+    %2:gpr32common = ANDWri %1, 15
+    $w0 = COPY %2
+    RET_ReallyLR implicit $w0
+...
+---
+name: zext_half_load_ur
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: zext_half_load_ur
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[LDURHHi:%[0-9]+]]:gpr32common = LDURHHi [[COPY]], 2 :: (load (s16))
+    ; CHECK-NEXT: $w0 = COPY [[LDURHHi]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr32common = LDURHHi %0, 2 :: (load (s16))
+    %2:gpr32common = ANDWri %1, 15
+    $w0 = COPY %2
+    RET_ReallyLR implicit $w0
+...
+---
+name: zext_half_load_roX
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: zext_half_load_roX
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[LDRHHroX:%[0-9]+]]:gpr32common = LDRHHroX [[COPY]], [[COPY1]], 0, 0 :: (load (s16))
+    ; CHECK-NEXT: $w0 = COPY [[LDRHHroX]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr64 = COPY $x1
+    %2:gpr32common = LDRHHroX %0, %1, 0, 0 :: (load (s16))
+    %3:gpr32common = ANDWri %2, 15
+    $w0 = COPY %3
+    RET_ReallyLR implicit $w0
+...
+---
+name: zext_half_load_roW
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0, $w1
+    ; CHECK-LABEL: name: zext_half_load_roW
+    ; CHECK: liveins: $x0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[LDRHHroW:%[0-9]+]]:gpr32common = LDRHHroW [[COPY]], [[COPY1]], 0, 0 :: (load (s16))
+    ; CHECK-NEXT: $w0 = COPY [[LDRHHroW]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr32 = COPY $w1
+    %2:gpr32common = LDRHHroW %0, %1, 0, 0 :: (load (s16))
+    %3:gpr32common = ANDWri %2, 15
+    $w0 = COPY %3
+    RET_ReallyLR implicit $w0
+...
+---
+name: sext_byte_load_not_removed
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: sext_byte_load_not_removed
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[LDRSBWui:%[0-9]+]]:gpr32common = LDRSBWui [[COPY]], 0 :: (load (s8))
+    ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[LDRSBWui]], 7
+    ; CHECK-NEXT: $w0 = COPY [[ANDWri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr32common = LDRSBWui %0, 0 :: (load (s8))
+    %2:gpr32common = ANDWri %1, 7
+    $w0 = COPY %2
+    RET_ReallyLR implicit $w0
+...
+---
+name: sext_half_load_not_removed
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: sext_half_load_not_removed
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[LDRSHWui:%[0-9]+]]:gpr32common = LDRSHWui [[COPY]], 0 :: (load (s16))
+    ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[LDRSHWui]], 15
+    ; CHECK-NEXT: $w0 = COPY [[ANDWri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr32common = LDRSHWui %0, 0 :: (load (s16))
+    %2:gpr32common = ANDWri %1, 15
+    $w0 = COPY %2
+    RET_ReallyLR implicit $w0
+...
+---
+name: wrong_mask_not_removed
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: wrong_mask_not_removed
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32common = LDRBBui [[COPY]], 0 :: (load (s8))
+    ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[LDRBBui]], 15
+    ; CHECK-NEXT: $w0 = COPY [[ANDWri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr32common = LDRBBui %0, 0 :: (load (s8))
+    %2:gpr32common = ANDWri %1, 15
+    $w0 = COPY %2
+    RET_ReallyLR implicit $w0
+...
+---
+name: small_mask_not_removed
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: small_mask_not_removed
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[LDRBBui:%[0-9]+]]:gpr32common = LDRBBui [[COPY]], 0 :: (load (s8))
+    ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[LDRBBui]], 3
+    ; CHECK-NEXT: $w0 = COPY [[ANDWri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr64common = COPY $x0
+    %1:gpr32common = LDRBBui %0, 0 :: (load (s8))
+    %2:gpr32common = ANDWri %1, 3
+    $w0 = COPY %2
+    RET_ReallyLR implicit $w0
+...
+---
+name: non_load_not_removed
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: non_load_not_removed
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32common = ADDWrr [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[ANDWri:%[0-9]+]]:gpr32common = ANDWri [[ADDWrr]], 7
+    ; CHECK-NEXT: $w0 = COPY [[ANDWri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr32 = COPY $w0
+    %1:gpr32 = COPY $w1
+    %2:gpr32common = ADDWrr %0, %1
+    %3:gpr32common = ANDWri %2, 7
+    $w0 = COPY %3
+    RET_ReallyLR implicit $w0
+...
+---
+name: physreg_dest_not_removed
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x0
+    ; CHECK-LABEL: name: physreg_dest_not_removed
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[LDRHHui:%[0-9]+]]:gpr32 = LDRHHui [[COPY]], 0 :: (load (s16))
+    ; CHECK-NEXT: $w2 = COPY [[LDRHHui]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w2
+    %0:gpr64common = COPY $x0
+    %1:gpr32 = LDRHHui %0, 0 :: (load (s16))
+    $w2 = ANDWri %1, 15
+    RET_ReallyLR implicit $w2
+...

``````````

</details>


https://github.com/llvm/llvm-project/pull/189518


More information about the llvm-commits mailing list