[llvm] [AArch64] Replace AND with LSL#2 for LDR target (#34101) (PR #89531)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 13:50:40 PDT 2024
https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/89531
>From 1b500fb3a39d8d040e2c7840e688e9d940cba3ca Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 31 May 2024 18:55:16 +0900
Subject: [PATCH 1/2] [AArch64] Add tests for prevent (shl (srl x, c1), c2) ->
(and (shift x, c3)) when load
---
llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll | 145 +++++++++++++++++++
1 file changed, 145 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
new file mode 100644
index 0000000000000..55871c4682b99
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
@@ -0,0 +1,145 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
+;
+
+define i16 @load16_shr63(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load16_shr63:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul x8, x1, x0
+; CHECK-NEXT: lsr x8, x8, #62
+; CHECK-NEXT: and x8, x8, #0x2
+; CHECK-NEXT: ldrh w0, [x2, x8]
+; CHECK-NEXT: ret
+entry:
+ %mul = mul i64 %b, %a
+ %shr = lshr i64 %mul, 63
+ %arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr
+ %0 = load i16, ptr %arrayidx, align 2
+ ret i16 %0
+}
+
+define i16 @load16_shr2(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load16_shr2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul x8, x1, x0
+; CHECK-NEXT: lsr x8, x8, #1
+; CHECK-NEXT: and x8, x8, #0x7ffffffffffffffe
+; CHECK-NEXT: ldrh w0, [x2, x8]
+; CHECK-NEXT: ret
+entry:
+ %mul = mul i64 %b, %a
+ %shr = lshr i64 %mul, 2
+ %arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr
+ %0 = load i16, ptr %arrayidx, align 2
+ ret i16 %0
+}
+
+define i16 @load16_shr1(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load16_shr1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul x8, x1, x0
+; CHECK-NEXT: and x8, x8, #0xfffffffffffffffe
+; CHECK-NEXT: ldrh w0, [x2, x8]
+; CHECK-NEXT: ret
+entry:
+ %mul = mul i64 %b, %a
+ %shr = lshr i64 %mul, 1
+ %arrayidx = getelementptr inbounds i16, ptr %table, i64 %shr
+ %0 = load i16, ptr %arrayidx, align 2
+ ret i16 %0
+}
+
+define i32 @load32_shr63(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load32_shr63:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul x8, x1, x0
+; CHECK-NEXT: lsr x8, x8, #61
+; CHECK-NEXT: and x8, x8, #0x4
+; CHECK-NEXT: ldr w0, [x2, x8]
+; CHECK-NEXT: ret
+entry:
+ %mul = mul i64 %b, %a
+ %shr = lshr i64 %mul, 63
+ %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
+ %0 = load i32, ptr %arrayidx, align 4
+ ret i32 %0
+}
+
+define i32 @load32_shr2(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load32_shr2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul x8, x1, x0
+; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
+; CHECK-NEXT: ldr w0, [x2, x8]
+; CHECK-NEXT: ret
+entry:
+ %mul = mul i64 %b, %a
+ %shr = lshr i64 %mul, 2
+ %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
+ %0 = load i32, ptr %arrayidx, align 4
+ ret i32 %0
+}
+
+define i32 @load32_shr1(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load32_shr1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul x8, x1, x0
+; CHECK-NEXT: lsl x8, x8, #1
+; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
+; CHECK-NEXT: ldr w0, [x2, x8]
+; CHECK-NEXT: ret
+entry:
+ %mul = mul i64 %b, %a
+ %shr = lshr i64 %mul, 1
+ %arrayidx = getelementptr inbounds i32, ptr %table, i64 %shr
+ %0 = load i32, ptr %arrayidx, align 4
+ ret i32 %0
+}
+
+define i64 @load64_shr63(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load64_shr63:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul x8, x1, x0
+; CHECK-NEXT: lsr x8, x8, #60
+; CHECK-NEXT: and x8, x8, #0x8
+; CHECK-NEXT: ldr x0, [x2, x8]
+; CHECK-NEXT: ret
+entry:
+ %mul = mul i64 %b, %a
+ %shr = lshr i64 %mul, 63
+ %arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr
+ %0 = load i64, ptr %arrayidx, align 8
+ ret i64 %0
+}
+
+define i64 @load64_shr2(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load64_shr2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul x8, x1, x0
+; CHECK-NEXT: lsl x8, x8, #1
+; CHECK-NEXT: and x8, x8, #0xfffffffffffffff8
+; CHECK-NEXT: ldr x0, [x2, x8]
+; CHECK-NEXT: ret
+entry:
+ %mul = mul i64 %b, %a
+ %shr = lshr i64 %mul, 2
+ %arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr
+ %0 = load i64, ptr %arrayidx, align 8
+ ret i64 %0
+}
+
+define i64 @load64_shr1(i64 %a, i64 %b, ptr %table) {
+; CHECK-LABEL: load64_shr1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mul x8, x1, x0
+; CHECK-NEXT: lsl x8, x8, #2
+; CHECK-NEXT: and x8, x8, #0xfffffffffffffff8
+; CHECK-NEXT: ldr x0, [x2, x8]
+; CHECK-NEXT: ret
+entry:
+ %mul = mul i64 %b, %a
+ %shr = lshr i64 %mul, 1
+ %arrayidx = getelementptr inbounds i64, ptr %table, i64 %shr
+ %0 = load i64, ptr %arrayidx, align 8
+ ret i64 %0
+}
>From a9fa7ec1dac44360f98458673299d1982a9cb7f2 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 23 Apr 2024 02:41:51 +0900
Subject: [PATCH 2/2] [AArch64] prevent (shl (srl x, c1), c2) -> (and (shift x,
c3)) when load
Currently, process of replacing bitwise operations consisting of
`(shl (srl x, c1), c2)` with `And` is performed by `DAGCombiner`.
However, in certain case like `(shl (srl, x, c1) 2)` is do not
need to transform to `AND` if it was used to `Load` Target.
Consider following case:
```
lsr x8, x8, #56
and x8, x8, #0xfc
ldr w0, [x2, x8]
ret
```
In this case, we can remove the `AND` by changing the target of `LDR`
to `[X2, X8, LSL #2]` and right-shifting amount change to 56 to 58.
after changed:
```
lsr x8, x8, #58
ldr w0, [x2, x8, lsl #2]
ret
```
This patch checks to see if the `(shl (srl x, c1) 2)` operation on
`load` target can be prevent transform to `And`.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 17 ++++++++
llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll | 43 ++++++++-----------
2 files changed, 35 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f552f91929201..5c486d598b81f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16918,6 +16918,23 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
return (!C1 || !C2 || C1->getZExtValue() >= C2->getZExtValue());
}
+ // We do not need to fold when this shifting used in specific load case:
+ // (ldr x, (add x, (shl (srl x, c1) 2)))
+ if (N->getOpcode() == ISD::SHL && N->hasOneUse()) {
+ if (auto C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ unsigned ShlAmt = C2->getZExtValue();
+ auto ShouldADD = *N->use_begin();
+ if (ShlAmt <= 3 && ShouldADD && ShouldADD->getOpcode() == ISD::ADD) {
+ if (auto ShouldLOAD = dyn_cast<LoadSDNode>(*ShouldADD->use_begin())) {
+ unsigned ByteVT = ShouldLOAD->getMemoryVT().getSizeInBits() / 8;
+ if ((1 << ShlAmt) == ByteVT &&
+ isIndexedLoadLegal(ISD::PRE_INC, ShouldLOAD->getMemoryVT()))
+ return false;
+ }
+ }
+ }
+ }
+
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
index 55871c4682b99..9dfc8df703ce6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fold-lshr.ll
@@ -6,9 +6,8 @@ define i16 @load16_shr63(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load16_shr63:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
-; CHECK-NEXT: lsr x8, x8, #62
-; CHECK-NEXT: and x8, x8, #0x2
-; CHECK-NEXT: ldrh w0, [x2, x8]
+; CHECK-NEXT: lsr x8, x8, #63
+; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
@@ -22,9 +21,8 @@ define i16 @load16_shr2(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load16_shr2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
-; CHECK-NEXT: lsr x8, x8, #1
-; CHECK-NEXT: and x8, x8, #0x7ffffffffffffffe
-; CHECK-NEXT: ldrh w0, [x2, x8]
+; CHECK-NEXT: lsr x8, x8, #2
+; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
@@ -38,8 +36,8 @@ define i16 @load16_shr1(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load16_shr1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
-; CHECK-NEXT: and x8, x8, #0xfffffffffffffffe
-; CHECK-NEXT: ldrh w0, [x2, x8]
+; CHECK-NEXT: lsr x8, x8, #1
+; CHECK-NEXT: ldrh w0, [x2, x8, lsl #1]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
@@ -53,9 +51,8 @@ define i32 @load32_shr63(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load32_shr63:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
-; CHECK-NEXT: lsr x8, x8, #61
-; CHECK-NEXT: and x8, x8, #0x4
-; CHECK-NEXT: ldr w0, [x2, x8]
+; CHECK-NEXT: lsr x8, x8, #63
+; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
@@ -69,8 +66,8 @@ define i32 @load32_shr2(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load32_shr2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
-; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
-; CHECK-NEXT: ldr w0, [x2, x8]
+; CHECK-NEXT: lsr x8, x8, #2
+; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
@@ -84,9 +81,8 @@ define i32 @load32_shr1(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load32_shr1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
-; CHECK-NEXT: lsl x8, x8, #1
-; CHECK-NEXT: and x8, x8, #0xfffffffffffffffc
-; CHECK-NEXT: ldr w0, [x2, x8]
+; CHECK-NEXT: lsr x8, x8, #1
+; CHECK-NEXT: ldr w0, [x2, x8, lsl #2]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
@@ -100,9 +96,8 @@ define i64 @load64_shr63(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load64_shr63:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
-; CHECK-NEXT: lsr x8, x8, #60
-; CHECK-NEXT: and x8, x8, #0x8
-; CHECK-NEXT: ldr x0, [x2, x8]
+; CHECK-NEXT: lsr x8, x8, #63
+; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
@@ -116,9 +111,8 @@ define i64 @load64_shr2(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load64_shr2:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
-; CHECK-NEXT: lsl x8, x8, #1
-; CHECK-NEXT: and x8, x8, #0xfffffffffffffff8
-; CHECK-NEXT: ldr x0, [x2, x8]
+; CHECK-NEXT: lsr x8, x8, #2
+; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
@@ -132,9 +126,8 @@ define i64 @load64_shr1(i64 %a, i64 %b, ptr %table) {
; CHECK-LABEL: load64_shr1:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mul x8, x1, x0
-; CHECK-NEXT: lsl x8, x8, #2
-; CHECK-NEXT: and x8, x8, #0xfffffffffffffff8
-; CHECK-NEXT: ldr x0, [x2, x8]
+; CHECK-NEXT: lsr x8, x8, #1
+; CHECK-NEXT: ldr x0, [x2, x8, lsl #3]
; CHECK-NEXT: ret
entry:
%mul = mul i64 %b, %a
More information about the llvm-commits
mailing list