[llvm] [AArch64] Replace AND with LSL#2 for LDR target (#34101) (PR #89531)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 31 03:48:42 PDT 2024


https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/89531

>From 5251d57da380593227f1c6dd1088911f06145d65 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 31 May 2024 18:55:16 +0900
Subject: [PATCH 1/2] [AArch64] Add tests for prevent (shl (srl x, c1), c2) ->
 (and (shift x, c3)) when load

---
 llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll | 76 ++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll

diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
new file mode 100644
index 0000000000000..04c2d4417b1fe
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+;
+
+define i32 @load_shr64(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load_shr64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr w0, [x2]
+; CHECK-NEXT:    ret
+entry:
+  %mul = mul i64 %b, %a
+  %shr = lshr i64 %mul, 64
+  %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
+  %0 = load i32, ptr %arrayidx, align 4
+  ret i32 %0
+}
+
+define i32 @load_shr63(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load_shr63:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mul x8, x1, x0
+; CHECK-NEXT:    lsr x8, x8, #61
+; CHECK-NEXT:    and x8, x8, #0x4
+; CHECK-NEXT:    ldr w0, [x2, x8]
+; CHECK-NEXT:    ret
+entry:
+  %mul = mul i64 %b, %a
+  %shr = lshr i64 %mul, 63
+  %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
+  %0 = load i32, ptr %arrayidx, align 4
+  ret i32 %0
+}
+
+define i32 @load_shr2(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load_shr2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mul x8, x1, x0
+; CHECK-NEXT:    and x8, x8, #0xfffffffffffffffc
+; CHECK-NEXT:    ldr w0, [x2, x8]
+; CHECK-NEXT:    ret
+entry:
+  %mul = mul i64 %b, %a
+  %shr = lshr i64 %mul, 2
+  %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
+  %0 = load i32, ptr %arrayidx, align 4
+  ret i32 %0
+}
+
+define i32 @load_shr1(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load_shr1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mul x8, x1, x0
+; CHECK-NEXT:    lsl x8, x8, #1
+; CHECK-NEXT:    and x8, x8, #0xfffffffffffffffc
+; CHECK-NEXT:    ldr w0, [x2, x8]
+; CHECK-NEXT:    ret
+entry:
+  %mul = mul i64 %b, %a
+  %shr = lshr i64 %mul, 1
+  %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
+  %0 = load i32, ptr %arrayidx, align 4
+  ret i32 %0
+}
+
+define i32 @load_shl1(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load_shl1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    lsl x8, x0, #3
+; CHECK-NEXT:    ldr w0, [x2, x8]
+; CHECK-NEXT:    ret
+entry:
+  %shl = shl i64 %a, 1
+  %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shl
+  %0 = load i32, ptr %arrayidx, align 4
+  ret i32 %0
+}

>From 378b15938f6d2f4f4319ac5b665db770231330fd Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 23 Apr 2024 02:41:51 +0900
Subject: [PATCH 2/2] [AArch64] prevent (shl (srl x, c1), c2) -> (and (shift x,
 c3)) when load

Currently, process of replacing bitwise operations consisting of
`(shl (srl x, c1), c2)` with `And` is performed by `DAGCombiner`.

However, in certain case like `(shl (srl, x, c1) 2)` is do not
need to transform to `AND` if it was used to `Load` Target.

Consider following case:
```
        lsr x8, x8, #56
        and x8, x8, #0xfc
        ldr w0, [x2, x8]
        ret
```

In this case, we can remove the `AND` by changing the target of `LDR`
to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58.

after changed:
```
        lsr x8, x8, #58
        ldr w0, [x2, x8, lsl #2]
        ret
```

This patch checks to see if the `(shl (srl x, c1) 2)` operation on
`load` target can be prevent transform to `And`.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 48 ++++++++++++++++++-
 llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll  | 14 +++---
 2 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2f46b23a97c62..2c2963b704780 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -563,6 +563,7 @@ namespace {
     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
+    bool isCanBeLoadedWithLsl(SDNode *N);
     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
                                                     const SDLoc &DL,
                                                     SDNode *N,
@@ -9893,7 +9894,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     // folding this will increase the total number of instructions.
     if (N0.getOpcode() == ISD::SRL &&
         (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
-        TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
+        TLI.shouldFoldConstantShiftPairToMask(N, Level) &&
+        !isCanBeLoadedWithLsl(N)) {
       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
                                     /*AllowUndefs*/ false,
                                     /*AllowTypeMismatch*/ true)) {
@@ -28338,6 +28340,50 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
   return false;
 }
 
+bool DAGCombiner::isCanBeLoadedWithLsl(SDNode *N) {
+  if (!N->hasOneUse())
+    return false;
+
+  APInt SrlAmt;
+  if (sd_match(N,
+               m_Shl(m_Srl(m_Value(), m_ConstInt(SrlAmt)), m_SpecificInt(2)))) {
+    // Srl knownbits
+    SDValue ShlV = SDValue(N, 0);
+    unsigned RegSize = ShlV.getValueType().getScalarSizeInBits();
+    KnownBits Known = DAG.computeKnownBits(ShlV);
+    if (Known.getBitWidth() != RegSize)
+      return false;
+
+    // check load (ldr x, (add x, (shl (srl x, c1) 2)))
+    SDNode *User = N->use_begin().getUse().getUser();
+    if (!User || User->getOpcode() != ISD::ADD)
+      return false;
+
+    SDNode *Load = User->use_begin().getUse().getUser();
+    if (!Load || Load->getOpcode() != ISD::LOAD)
+      return false;
+
+    auto LoadN = dyn_cast<LoadSDNode>(Load);
+    if (!LoadN)
+      return false;
+
+    TargetLoweringBase::AddrMode AM;
+    AM.HasBaseReg = true;
+    AM.BaseOffs = Known.getMaxValue().getZExtValue();
+    EVT VT = LoadN->getMemoryVT();
+    unsigned AS = LoadN->getAddressSpace();
+    Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+    if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+      return false;
+
+    if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT))
+      return false;
+    return true;
+  }
+
+  return false;
+}
+
 /// This is the entry point for the file.
 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
                            CodeGenOptLevel OptLevel) {
diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
index 04c2d4417b1fe..1b625cb41bffb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
@@ -19,9 +19,8 @@ define i32 @load_shr63(i64 %a, i64 %b, ptr %table) {
 ; CHECK-LABEL: load_shr63:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mul x8, x1, x0
-; CHECK-NEXT:    lsr x8, x8, #61
-; CHECK-NEXT:    and x8, x8, #0x4
-; CHECK-NEXT:    ldr w0, [x2, x8]
+; CHECK-NEXT:    lsr x8, x8, #63
+; CHECK-NEXT:    ldr w0, [x2, x8, lsl #2]
 ; CHECK-NEXT:    ret
 entry:
   %mul = mul i64 %b, %a
@@ -35,8 +34,8 @@ define i32 @load_shr2(i64 %a, i64 %b, ptr %table) {
 ; CHECK-LABEL: load_shr2:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mul x8, x1, x0
-; CHECK-NEXT:    and x8, x8, #0xfffffffffffffffc
-; CHECK-NEXT:    ldr w0, [x2, x8]
+; CHECK-NEXT:    lsr x8, x8, #2
+; CHECK-NEXT:    ldr w0, [x2, x8, lsl #2]
 ; CHECK-NEXT:    ret
 entry:
   %mul = mul i64 %b, %a
@@ -50,9 +49,8 @@ define i32 @load_shr1(i64 %a, i64 %b, ptr %table) {
 ; CHECK-LABEL: load_shr1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mul x8, x1, x0
-; CHECK-NEXT:    lsl x8, x8, #1
-; CHECK-NEXT:    and x8, x8, #0xfffffffffffffffc
-; CHECK-NEXT:    ldr w0, [x2, x8]
+; CHECK-NEXT:    lsr x8, x8, #1
+; CHECK-NEXT:    ldr w0, [x2, x8, lsl #2]
 ; CHECK-NEXT:    ret
 entry:
   %mul = mul i64 %b, %a



More information about the llvm-commits mailing list