[llvm] a3cf864 - [AArch64] Cleanup existing values in getMemOpInfo (#98196)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 3 04:31:13 PDT 2024
Author: David Green
Date: 2024-08-03T12:31:10+01:00
New Revision: a3cf8642bf4d3cd4362063cc2a98ead4dca8cdec
URL: https://github.com/llvm/llvm-project/commit/a3cf8642bf4d3cd4362063cc2a98ead4dca8cdec
DIFF: https://github.com/llvm/llvm-project/commit/a3cf8642bf4d3cd4362063cc2a98ead4dca8cdec.diff
LOG: [AArch64] Cleanup existing values in getMemOpInfo (#98196)
This patch tries to clean up some of the existing values in
getMemOpInfo. All values should now be in bytes (not bits), and the
MinOffset/MaxOffset are now always represented unscaled (the immediate
that will be present in the final instruction).
Although I could not find a place where it altered codegen, the offset
of a post-index instruction will be 0, not scale*imm. A
IsPostIndexLdStOpcode method has been added to try and make sure that
case is handled properly.
Added:
llvm/test/CodeGen/AArch64/sched-postidxalias.mir
Modified:
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bd530903bb664..f28511c74dcd6 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1487,7 +1487,8 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
// If the first store isn't right where we want SP then we can't fold the
// update in so create a normal arithmetic instruction instead.
if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
- CSStackSizeInc < MinOffset || CSStackSizeInc > MaxOffset) {
+ CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
+ CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
// If we are destroying the frame, make sure we add the increment after the
// last frame operation.
if (FrameFlag == MachineInstr::FrameDestroy)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6902b783d7d65..e425984a67784 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -3487,6 +3487,229 @@ MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
"Function must not be called with an addressing mode it can't handle");
}
+/// Return true if the opcode is a post-index ld/st instruction, which really
+/// loads from base+0.
+static bool isPostIndexLdStOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+ case AArch64::LD1Fourv16b_POST:
+ case AArch64::LD1Fourv1d_POST:
+ case AArch64::LD1Fourv2d_POST:
+ case AArch64::LD1Fourv2s_POST:
+ case AArch64::LD1Fourv4h_POST:
+ case AArch64::LD1Fourv4s_POST:
+ case AArch64::LD1Fourv8b_POST:
+ case AArch64::LD1Fourv8h_POST:
+ case AArch64::LD1Onev16b_POST:
+ case AArch64::LD1Onev1d_POST:
+ case AArch64::LD1Onev2d_POST:
+ case AArch64::LD1Onev2s_POST:
+ case AArch64::LD1Onev4h_POST:
+ case AArch64::LD1Onev4s_POST:
+ case AArch64::LD1Onev8b_POST:
+ case AArch64::LD1Onev8h_POST:
+ case AArch64::LD1Rv16b_POST:
+ case AArch64::LD1Rv1d_POST:
+ case AArch64::LD1Rv2d_POST:
+ case AArch64::LD1Rv2s_POST:
+ case AArch64::LD1Rv4h_POST:
+ case AArch64::LD1Rv4s_POST:
+ case AArch64::LD1Rv8b_POST:
+ case AArch64::LD1Rv8h_POST:
+ case AArch64::LD1Threev16b_POST:
+ case AArch64::LD1Threev1d_POST:
+ case AArch64::LD1Threev2d_POST:
+ case AArch64::LD1Threev2s_POST:
+ case AArch64::LD1Threev4h_POST:
+ case AArch64::LD1Threev4s_POST:
+ case AArch64::LD1Threev8b_POST:
+ case AArch64::LD1Threev8h_POST:
+ case AArch64::LD1Twov16b_POST:
+ case AArch64::LD1Twov1d_POST:
+ case AArch64::LD1Twov2d_POST:
+ case AArch64::LD1Twov2s_POST:
+ case AArch64::LD1Twov4h_POST:
+ case AArch64::LD1Twov4s_POST:
+ case AArch64::LD1Twov8b_POST:
+ case AArch64::LD1Twov8h_POST:
+ case AArch64::LD1i16_POST:
+ case AArch64::LD1i32_POST:
+ case AArch64::LD1i64_POST:
+ case AArch64::LD1i8_POST:
+ case AArch64::LD2Rv16b_POST:
+ case AArch64::LD2Rv1d_POST:
+ case AArch64::LD2Rv2d_POST:
+ case AArch64::LD2Rv2s_POST:
+ case AArch64::LD2Rv4h_POST:
+ case AArch64::LD2Rv4s_POST:
+ case AArch64::LD2Rv8b_POST:
+ case AArch64::LD2Rv8h_POST:
+ case AArch64::LD2Twov16b_POST:
+ case AArch64::LD2Twov2d_POST:
+ case AArch64::LD2Twov2s_POST:
+ case AArch64::LD2Twov4h_POST:
+ case AArch64::LD2Twov4s_POST:
+ case AArch64::LD2Twov8b_POST:
+ case AArch64::LD2Twov8h_POST:
+ case AArch64::LD2i16_POST:
+ case AArch64::LD2i32_POST:
+ case AArch64::LD2i64_POST:
+ case AArch64::LD2i8_POST:
+ case AArch64::LD3Rv16b_POST:
+ case AArch64::LD3Rv1d_POST:
+ case AArch64::LD3Rv2d_POST:
+ case AArch64::LD3Rv2s_POST:
+ case AArch64::LD3Rv4h_POST:
+ case AArch64::LD3Rv4s_POST:
+ case AArch64::LD3Rv8b_POST:
+ case AArch64::LD3Rv8h_POST:
+ case AArch64::LD3Threev16b_POST:
+ case AArch64::LD3Threev2d_POST:
+ case AArch64::LD3Threev2s_POST:
+ case AArch64::LD3Threev4h_POST:
+ case AArch64::LD3Threev4s_POST:
+ case AArch64::LD3Threev8b_POST:
+ case AArch64::LD3Threev8h_POST:
+ case AArch64::LD3i16_POST:
+ case AArch64::LD3i32_POST:
+ case AArch64::LD3i64_POST:
+ case AArch64::LD3i8_POST:
+ case AArch64::LD4Fourv16b_POST:
+ case AArch64::LD4Fourv2d_POST:
+ case AArch64::LD4Fourv2s_POST:
+ case AArch64::LD4Fourv4h_POST:
+ case AArch64::LD4Fourv4s_POST:
+ case AArch64::LD4Fourv8b_POST:
+ case AArch64::LD4Fourv8h_POST:
+ case AArch64::LD4Rv16b_POST:
+ case AArch64::LD4Rv1d_POST:
+ case AArch64::LD4Rv2d_POST:
+ case AArch64::LD4Rv2s_POST:
+ case AArch64::LD4Rv4h_POST:
+ case AArch64::LD4Rv4s_POST:
+ case AArch64::LD4Rv8b_POST:
+ case AArch64::LD4Rv8h_POST:
+ case AArch64::LD4i16_POST:
+ case AArch64::LD4i32_POST:
+ case AArch64::LD4i64_POST:
+ case AArch64::LD4i8_POST:
+ case AArch64::LDAPRWpost:
+ case AArch64::LDAPRXpost:
+ case AArch64::LDIAPPWpost:
+ case AArch64::LDIAPPXpost:
+ case AArch64::LDPDpost:
+ case AArch64::LDPQpost:
+ case AArch64::LDPSWpost:
+ case AArch64::LDPSpost:
+ case AArch64::LDPWpost:
+ case AArch64::LDPXpost:
+ case AArch64::LDRBBpost:
+ case AArch64::LDRBpost:
+ case AArch64::LDRDpost:
+ case AArch64::LDRHHpost:
+ case AArch64::LDRHpost:
+ case AArch64::LDRQpost:
+ case AArch64::LDRSBWpost:
+ case AArch64::LDRSBXpost:
+ case AArch64::LDRSHWpost:
+ case AArch64::LDRSHXpost:
+ case AArch64::LDRSWpost:
+ case AArch64::LDRSpost:
+ case AArch64::LDRWpost:
+ case AArch64::LDRXpost:
+ case AArch64::ST1Fourv16b_POST:
+ case AArch64::ST1Fourv1d_POST:
+ case AArch64::ST1Fourv2d_POST:
+ case AArch64::ST1Fourv2s_POST:
+ case AArch64::ST1Fourv4h_POST:
+ case AArch64::ST1Fourv4s_POST:
+ case AArch64::ST1Fourv8b_POST:
+ case AArch64::ST1Fourv8h_POST:
+ case AArch64::ST1Onev16b_POST:
+ case AArch64::ST1Onev1d_POST:
+ case AArch64::ST1Onev2d_POST:
+ case AArch64::ST1Onev2s_POST:
+ case AArch64::ST1Onev4h_POST:
+ case AArch64::ST1Onev4s_POST:
+ case AArch64::ST1Onev8b_POST:
+ case AArch64::ST1Onev8h_POST:
+ case AArch64::ST1Threev16b_POST:
+ case AArch64::ST1Threev1d_POST:
+ case AArch64::ST1Threev2d_POST:
+ case AArch64::ST1Threev2s_POST:
+ case AArch64::ST1Threev4h_POST:
+ case AArch64::ST1Threev4s_POST:
+ case AArch64::ST1Threev8b_POST:
+ case AArch64::ST1Threev8h_POST:
+ case AArch64::ST1Twov16b_POST:
+ case AArch64::ST1Twov1d_POST:
+ case AArch64::ST1Twov2d_POST:
+ case AArch64::ST1Twov2s_POST:
+ case AArch64::ST1Twov4h_POST:
+ case AArch64::ST1Twov4s_POST:
+ case AArch64::ST1Twov8b_POST:
+ case AArch64::ST1Twov8h_POST:
+ case AArch64::ST1i16_POST:
+ case AArch64::ST1i32_POST:
+ case AArch64::ST1i64_POST:
+ case AArch64::ST1i8_POST:
+ case AArch64::ST2GPostIndex:
+ case AArch64::ST2Twov16b_POST:
+ case AArch64::ST2Twov2d_POST:
+ case AArch64::ST2Twov2s_POST:
+ case AArch64::ST2Twov4h_POST:
+ case AArch64::ST2Twov4s_POST:
+ case AArch64::ST2Twov8b_POST:
+ case AArch64::ST2Twov8h_POST:
+ case AArch64::ST2i16_POST:
+ case AArch64::ST2i32_POST:
+ case AArch64::ST2i64_POST:
+ case AArch64::ST2i8_POST:
+ case AArch64::ST3Threev16b_POST:
+ case AArch64::ST3Threev2d_POST:
+ case AArch64::ST3Threev2s_POST:
+ case AArch64::ST3Threev4h_POST:
+ case AArch64::ST3Threev4s_POST:
+ case AArch64::ST3Threev8b_POST:
+ case AArch64::ST3Threev8h_POST:
+ case AArch64::ST3i16_POST:
+ case AArch64::ST3i32_POST:
+ case AArch64::ST3i64_POST:
+ case AArch64::ST3i8_POST:
+ case AArch64::ST4Fourv16b_POST:
+ case AArch64::ST4Fourv2d_POST:
+ case AArch64::ST4Fourv2s_POST:
+ case AArch64::ST4Fourv4h_POST:
+ case AArch64::ST4Fourv4s_POST:
+ case AArch64::ST4Fourv8b_POST:
+ case AArch64::ST4Fourv8h_POST:
+ case AArch64::ST4i16_POST:
+ case AArch64::ST4i32_POST:
+ case AArch64::ST4i64_POST:
+ case AArch64::ST4i8_POST:
+ case AArch64::STGPostIndex:
+ case AArch64::STGPpost:
+ case AArch64::STPDpost:
+ case AArch64::STPQpost:
+ case AArch64::STPSpost:
+ case AArch64::STPWpost:
+ case AArch64::STPXpost:
+ case AArch64::STRBBpost:
+ case AArch64::STRBpost:
+ case AArch64::STRDpost:
+ case AArch64::STRHHpost:
+ case AArch64::STRHpost:
+ case AArch64::STRQpost:
+ case AArch64::STRSpost:
+ case AArch64::STRWpost:
+ case AArch64::STRXpost:
+ case AArch64::STZ2GPostIndex:
+ case AArch64::STZGPostIndex:
+ return true;
+ }
+}
+
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
bool &OffsetIsScalable, TypeSize &Width,
@@ -3518,8 +3741,11 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
// Compute the offset. Offset is calculated as the immediate operand
// multiplied by the scaling factor. Unscaled instructions have scaling factor
- // set to 1.
- if (LdSt.getNumExplicitOperands() == 3) {
+ // set to 1. Postindex are a special case which have an offset of 0.
+ if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
+ BaseOp = &LdSt.getOperand(2);
+ Offset = 0;
+ } else if (LdSt.getNumExplicitOperands() == 3) {
BaseOp = &LdSt.getOperand(1);
Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
} else {
@@ -3529,10 +3755,7 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
}
OffsetIsScalable = Scale.isScalable();
- if (!BaseOp->isReg() && !BaseOp->isFI())
- return false;
-
- return true;
+ return BaseOp->isReg() || BaseOp->isFI();
}
MachineOperand &
@@ -3622,8 +3845,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
break;
case AArch64::STRWpost:
case AArch64::LDRWpost:
- Scale = TypeSize::getFixed(4);
- Width = TypeSize::getFixed(32);
+ Scale = TypeSize::getFixed(1);
+ Width = TypeSize::getFixed(4);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -3690,13 +3913,15 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MinOffset = -256;
MaxOffset = 255;
break;
- // LDP / STP
+ // LDP / STP (including pre/post inc)
case AArch64::LDPQi:
case AArch64::LDNPQi:
case AArch64::STPQi:
case AArch64::STNPQi:
+ case AArch64::STPQpre:
+ case AArch64::LDPQpost:
Scale = TypeSize::getFixed(16);
- Width = TypeSize::getFixed(32);
+ Width = TypeSize::getFixed(16 * 2);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3708,8 +3933,12 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STPDi:
case AArch64::STNPXi:
case AArch64::STNPDi:
+ case AArch64::STPXpre:
+ case AArch64::LDPXpost:
+ case AArch64::STPDpre:
+ case AArch64::LDPDpost:
Scale = TypeSize::getFixed(8);
- Width = TypeSize::getFixed(16);
+ Width = TypeSize::getFixed(8 * 2);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3722,27 +3951,10 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STNPWi:
case AArch64::STNPSi:
Scale = TypeSize::getFixed(4);
- Width = TypeSize::getFixed(8);
+ Width = TypeSize::getFixed(4 * 2);
MinOffset = -64;
MaxOffset = 63;
break;
- // pre/post inc
- case AArch64::STPQpre:
- case AArch64::LDPQpost:
- Scale = TypeSize::getFixed(16);
- Width = TypeSize::getFixed(16);
- MinOffset = -1024;
- MaxOffset = 1008;
- break;
- case AArch64::STPXpre:
- case AArch64::LDPXpost:
- case AArch64::STPDpre:
- case AArch64::LDPDpost:
- Scale = TypeSize::getFixed(8);
- Width = TypeSize::getFixed(8);
- MinOffset = -512;
- MaxOffset = 504;
- break;
case AArch64::StoreSwiftAsyncContext:
// Store is an STRXui, but there might be an ADDXri in the expansion too.
Scale = TypeSize::getFixed(1);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 7e5ac423f09ce..24a500aa190ab 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -317,7 +317,10 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
/// Returns true if opcode \p Opc is a memory operation. If it is, set
/// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
///
- /// For unscaled instructions, \p Scale is set to 1.
+ /// For unscaled instructions, \p Scale is set to 1. All values are in bytes.
+ /// MinOffset/MaxOffset are the un-scaled limits of the immediate in the
+ /// instruction, the actual offset limit is [MinOffset*Scale,
+ /// MaxOffset*Scale].
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width,
int64_t &MinOffset, int64_t &MaxOffset);
diff --git a/llvm/test/CodeGen/AArch64/sched-postidxalias.mir b/llvm/test/CodeGen/AArch64/sched-postidxalias.mir
new file mode 100644
index 0000000000000..98ee0fa21b2dd
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sched-postidxalias.mir
@@ -0,0 +1,63 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64 -mcpu=cortex-a55 -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Both the accesses should have an offset of 0
+# CHECK: Num BaseOps: 1, Offset: 0, OffsetIsScalable: 0, Width: LocationSize::precise(4)
+# CHECK: Num BaseOps: 1, Offset: 0, OffsetIsScalable: 0, Width: LocationSize::precise(4)
+
+--- |
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+ target triple = "aarch64"
+
+ define ptr @post(ptr %p, i32 %d1, i32 %d2) {
+ entry:
+ %d3 = mul i32 %d1, %d2
+ %q = getelementptr i64, ptr %p, i64 3
+ %r = getelementptr i64, ptr %p, i64 3
+ store i32 %d3, ptr %p, align 8
+ %0 = load i32, ptr %r, align 8
+ store i32 %d1, ptr %p, align 8
+ %add.ptr = getelementptr inbounds i8, ptr %p, i64 24
+ ret ptr %add.ptr
+ }
+
+...
+---
+name: post
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gpr64common, preferred-register: '' }
+ - { id: 1, class: gpr32, preferred-register: '' }
+ - { id: 2, class: gpr32, preferred-register: '' }
+ - { id: 3, class: gpr32, preferred-register: '' }
+ - { id: 4, class: gpr64common, preferred-register: '' }
+liveins:
+ - { reg: '$x0', virtual-reg: '%0' }
+ - { reg: '$w1', virtual-reg: '%1' }
+ - { reg: '$w2', virtual-reg: '%2' }
+body: |
+ bb.0.entry:
+ liveins: $x0, $w1, $w2
+
+ ; CHECK-LABEL: name: post
+ ; CHECK: liveins: $x0, $w1, $w2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[MADDWrrr:%[0-9]+]]:gpr32 = MADDWrrr [[COPY1]], [[COPY]], $wzr
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: STRWui [[MADDWrrr]], [[COPY2]], 0 :: (store (s32) into %ir.p, align 8)
+ ; CHECK-NEXT: early-clobber [[COPY2]]:gpr64common = STRWpost [[COPY1]], [[COPY2]], 24 :: (store (s32) into %ir.p, align 8)
+ ; CHECK-NEXT: $x0 = COPY [[COPY2]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %2:gpr32 = COPY $w2
+ %1:gpr32 = COPY $w1
+ %4:gpr64common = COPY $x0
+ %3:gpr32 = MADDWrrr %1, %2, $wzr
+ STRWui %3, %4, 0 :: (store (s32) into %ir.p, align 8)
+ early-clobber %4:gpr64common = STRWpost %1, %4, 24 :: (store (s32) into %ir.p, align 8)
+ $x0 = COPY %4
+ RET_ReallyLR implicit $x0
+
+...
More information about the llvm-commits
mailing list