[llvm] [AArch64] Limit immediate offsets when folding instructions into addressing modes (PR #67345)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 25 09:30:26 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
<details>
<summary>Changes</summary>
Don't increase/decrease immediate offsets in folded instructions beyond the limits of `LDP`.
---
Full diff: https://github.com/llvm/llvm-project/pull/67345.diff
3 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+38-9)
- (modified) llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll (+2-2)
- (modified) llvm/test/CodeGen/AArch64/pow.ll (+5-5)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index c6a59ec44ef80e9..3cd7965d8d6fb9c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2866,14 +2866,43 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
}
// Handle memory instructions with a [Reg, #Imm] addressing mode.
- auto canFoldAddSubImmIntoAddrMode = [&](int64_t Offset) -> bool {
- Offset += MemI.getOperand(2).getImm() * OffsetScale;
- if (!isLegalAddressingMode(NumBytes, Offset, /* Scale */ 0))
+
+ // Check we are not breaking a potential conversion to an LDP.
+ auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
+ int64_t NewOffset) -> bool {
+ int64_t MinOffset, MaxOffset;
+ switch (NumBytes) {
+ default:
+ return true;
+ case 4:
+ MinOffset = -256;
+ MaxOffset = 252;
+ break;
+ case 8:
+ MinOffset = -512;
+ MaxOffset = 504;
+ break;
+ case 16:
+ MinOffset = -1024;
+ MaxOffset = 1008;
+ break;
+ }
+ return OldOffset < MinOffset || OldOffset > MaxOffset ||
+ (NewOffset >= MinOffset && NewOffset <= MaxOffset);
+ };
+ auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
+ int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
+ int64_t NewOffset = OldOffset + Disp;
+ if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
+ return false;
+ // If the old offset would fit into an LDP, but the new offset wouldn't,
+ // bail out.
+ if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
return false;
AM.BaseReg = AddrI.getOperand(1).getReg();
AM.ScaledReg = 0;
AM.Scale = 0;
- AM.Displacement = Offset;
+ AM.Displacement = NewOffset;
AM.Form = ExtAddrMode::Formula::Basic;
return true;
};
@@ -2899,7 +2928,7 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
Subtarget.isSTRQroSlow();
};
- int64_t Offset = 0;
+ int64_t Disp = 0;
const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
switch (AddrI.getOpcode()) {
default:
@@ -2910,16 +2939,16 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
// ldr Xd, [Xa, #M]
// ->
// ldr Xd, [Xn, #N'+M]
- Offset = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
- return canFoldAddSubImmIntoAddrMode(Offset);
+ Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
+ return canFoldAddSubImmIntoAddrMode(Disp);
case AArch64::SUBXri:
// sub Xa, Xn, #N
// ldr Xd, [Xa, #M]
// ->
// ldr Xd, [Xn, #N'+M]
- Offset = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
- return canFoldAddSubImmIntoAddrMode(-Offset);
+ Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
+ return canFoldAddSubImmIntoAddrMode(-Disp);
case AArch64::ADDXrs: {
// add Xa, Xn, Xm, lsl #N
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll b/llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll
index 6aaefff1f724062..2c2d72ce6afd079 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/split-offsets-for-stp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=aarch64-apple-ios -verify-machineinstrs -global-isel -aarch64-postlegalizer-consecutive-memops=0 < %s | FileCheck %s --check-prefix=CHECK-NO-SPLIT
-; RUN: llc -mtriple=aarch64-apple-ios -verify-machineinstrs -global-isel < %s | FileCheck %s --check-prefix=CHECK-SPLIT
+; RUN: llc -mtriple=aarch64-apple-ios -verify-machineinstrs -global-isel -aarch64-enable-sink-fold=true -aarch64-postlegalizer-consecutive-memops=0 < %s | FileCheck %s --check-prefix=CHECK-NO-SPLIT
+; RUN: llc -mtriple=aarch64-apple-ios -verify-machineinstrs -global-isel -aarch64-enable-sink-fold=true < %s | FileCheck %s --check-prefix=CHECK-SPLIT
define void @basic_split(ptr %p) {
; CHECK-NO-SPLIT-LABEL: basic_split:
diff --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll
index ba9a5398298af7a..5141b21c7976a90 100644
--- a/llvm/test/CodeGen/AArch64/pow.ll
+++ b/llvm/test/CodeGen/AArch64/pow.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-- -aarch64-enable-sink-fold=true | FileCheck %s
declare float @llvm.pow.f32(float, float)
declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
@@ -110,17 +110,17 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi
; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #48
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: mov d0, v0.d[1]
; CHECK-NEXT: fmov d1, #0.25000000
; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-NEXT: bl pow
; CHECK-NEXT: fmov d1, #0.25000000
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: bl pow
-; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-NEXT: mov v0.d[1], v1.d[0]
``````````
</details>
https://github.com/llvm/llvm-project/pull/67345
More information about the llvm-commits
mailing list