[llvm] [AArch64] Optimize when storing symmetry constants (PR #93717)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 17 13:13:32 PDT 2024
https://github.com/ParkHanbum updated https://github.com/llvm/llvm-project/pull/93717
>From ed551e3f47c665ca52ebc0db16d8ade10a2c225d Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Sat, 25 May 2024 16:54:40 +0900
Subject: [PATCH 1/4] [AArch64] Add PreTest for storing symmetry constant
---
.../CodeGen/AArch64/movimm-expand-ldst.ll | 135 ++++++++++++++++++
.../CodeGen/AArch64/movimm-expand-ldst.mir | 54 +++++++
2 files changed, 189 insertions(+)
diff --git a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
index b25ac96f97c7d..8737738ab9025 100644
--- a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
@@ -93,3 +93,138 @@ define i64 @testuu0xf555f555f555f555() {
; CHECK-NEXT: ret
ret i64 u0xf555f555f555f555
}
+
+define void @test_store_0x1234567812345678(ptr %x) {
+; CHECK-LABEL: test_store_0x1234567812345678:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #22136 // =0x5678
+; CHECK-NEXT: movk x8, #4660, lsl #16
+; CHECK-NEXT: orr x8, x8, x8, lsl #32
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0x1234567812345678, ptr %x
+ ret void
+}
+
+define void @test_store_0xff3456ffff3456ff(ptr %x) {
+; CHECK-LABEL: test_store_0xff3456ffff3456ff:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #22271 // =0x56ff
+; CHECK-NEXT: movk x8, #65332, lsl #16
+; CHECK-NEXT: orr x8, x8, x8, lsl #32
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0xff3456ffff3456ff, ptr %x
+ ret void
+}
+
+define void @test_store_0x00345600345600(ptr %x) {
+; CHECK-LABEL: test_store_0x00345600345600:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #22016 // =0x5600
+; CHECK-NEXT: movk x8, #52, lsl #16
+; CHECK-NEXT: movk x8, #13398, lsl #32
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0x00345600345600, ptr %x
+ ret void
+}
+
+define void @test_store_0x5555555555555555(ptr %x) {
+; CHECK-LABEL: test_store_0x5555555555555555:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0x5555555555555555, ptr %x
+ ret void
+}
+
+define void @test_store_0x5055555550555555(ptr %x) {
+; CHECK-LABEL: test_store_0x5055555550555555:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555
+; CHECK-NEXT: and x8, x8, #0xf0fffffff0ffffff
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0x5055555550555555, ptr %x
+ ret void
+}
+
+define void @test_store_0x0000555555555555(ptr %x) {
+; CHECK-LABEL: test_store_0x0000555555555555:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555
+; CHECK-NEXT: movk x8, #0, lsl #48
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0x0000555555555555, ptr %x
+ ret void
+}
+
+define void @test_store_0x0000555500005555(ptr %x) {
+; CHECK-LABEL: test_store_0x0000555500005555:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #21845 // =0x5555
+; CHECK-NEXT: movk x8, #21845, lsl #32
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0x0000555500005555, ptr %x
+ ret void
+}
+
+define void @test_store_0x5555000055550000(ptr %x) {
+; CHECK-LABEL: test_store_0x5555000055550000:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #1431633920 // =0x55550000
+; CHECK-NEXT: movk x8, #21845, lsl #48
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0x5555000055550000, ptr %x
+ ret void
+}
+
+define void @test_store_u0xffff5555ffff5555(ptr %x) {
+; CHECK-LABEL: test_store_u0xffff5555ffff5555:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #-43691 // =0xffffffffffff5555
+; CHECK-NEXT: movk x8, #21845, lsl #32
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0xffff5555ffff5555, ptr %x
+ ret void
+}
+
+define void @test_store_0x8888ffff8888ffff(ptr %x) {
+; CHECK-LABEL: test_store_0x8888ffff8888ffff:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #-2004287489 // =0xffffffff8888ffff
+; CHECK-NEXT: movk x8, #34952, lsl #48
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0x8888ffff8888ffff, ptr %x
+ ret void
+}
+
+define void @test_store_uu0xfffff555f555f555(ptr %x) {
+; CHECK-LABEL: test_store_uu0xfffff555f555f555:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #-2731 // =0xfffffffffffff555
+; CHECK-NEXT: movk x8, #62805, lsl #16
+; CHECK-NEXT: movk x8, #62805, lsl #32
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0xfffff555f555f555, ptr %x
+ ret void
+}
+
+define void @test_store_uu0xf555f555f555f555(ptr %x) {
+; CHECK-LABEL: test_store_uu0xf555f555f555f555:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555
+; CHECK-NEXT: orr x8, x8, #0xe001e001e001e001
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ret
+ store i64 u0xf555f555f555f555, ptr %x
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
index 72529807d5d54..c6021b508cc08 100644
--- a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
+++ b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
@@ -32,3 +32,57 @@ body: |
; CHECK-NEXT: RET undef $lr, implicit $x0
renamable $x0 = MOVi64imm -4550323095879417536
RET_ReallyLR implicit $x0
+...
+---
+name: test_fold_repeating_constant_store
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: test_fold_repeating_constant_store
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x8 = MOVZXi 49370, 0
+ ; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 320, 16
+ ; CHECK-NEXT: renamable $x8 = ORRXrs $x8, $x8, 32
+ ; CHECK-NEXT: STRXui killed renamable $x8, killed renamable $x0, 0
+ ; CHECK-NEXT: RET undef $lr
+ renamable $x8 = MOVi64imm 90284035103834330
+ STRXui killed renamable $x8, killed renamable $x0, 0
+ RET_ReallyLR
+...
+---
+name: test_fold_repeating_constant_store_neg
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: test_fold_repeating_constant_store_neg
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x8 = MOVZXi 320, 0
+ ; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 49370, 16
+ ; CHECK-NEXT: renamable $x8 = ORRXrs $x8, $x8, 32
+ ; CHECK-NEXT: STRXui killed renamable $x8, killed renamable $x0, 0
+ ; CHECK-NEXT: RET undef $lr
+ renamable $x8 = MOVi64imm -4550323095879417536
+ STRXui killed renamable $x8, killed renamable $x0, 0
+ RET_ReallyLR
+...
+---
+name: test_fold_repeating_constant_store_16bit_unit
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: test_fold_repeating_constant_store_16bit_unit
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $x8 = MOVZXi 21845, 16
+ ; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 21845, 48
+ ; CHECK-NEXT: STRXui killed renamable $x8, killed renamable $x0, 0
+ ; CHECK-NEXT: RET undef $lr
+ renamable $x8 = MOVZXi 21845, 16
+ renamable $x8 = MOVKXi $x8, 21845, 48
+ STRXui killed renamable $x8, killed renamable $x0, 0
+ RET undef $lr
>From 38ab6d8a61f83e5cb640c69fe4f5f740c2095d50 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Fri, 22 Mar 2024 14:31:19 +0900
Subject: [PATCH 2/4] [AArch64] Optimize when storing symmetry constants
This change looks for instructions of storing symmetric constants
instruction 32-bit units. usually consisting of several 'MOV' and
one or less 'ORR'.
If found, load only the lower 32-bit constant and change it to copy
and save to the upper 32-bit using the 'STP' instruction.
For example:
renamable $x8 = MOVZXi 49370, 0
renamable $x8 = MOVKXi $x8, 320, 16
renamable $x8 = ORRXrs $x8, $x8, 32
STRXui killed renamable $x8, killed renamable $x0, 0
becomes
$w8 = MOVZWi 49370, 0
$w8 = MOVKWi $w8, 320, 16
STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
---
.../AArch64/AArch64LoadStoreOptimizer.cpp | 189 ++++++++++++++++++
.../CodeGen/AArch64/movimm-expand-ldst.ll | 24 +--
.../CodeGen/AArch64/movimm-expand-ldst.mir | 19 +-
3 files changed, 207 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index d0adb78b231a7..09d9fa11a4959 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -201,6 +201,14 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Find and merge a base register updates before or after a ld/st instruction.
bool tryToMergeLdStUpdate(MachineBasicBlock::iterator &MBBI);
+ // Finds and collapses loads of symmetric constant value.
+ bool tryFoldSymmetryConstantLoad(MachineBasicBlock::iterator &I,
+ unsigned Limit);
+ MachineBasicBlock::iterator
+ doFoldSymmetryConstantLoad(MachineInstr &MI,
+ SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
+ int SuccIndex, bool hasORR, int Accumulated);
+
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -2252,6 +2260,167 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
return E;
}
+static bool isSymmetric(MachineInstr &MI, Register BaseReg) {
+ auto MatchBaseReg = [&](unsigned Count) {
+ for (unsigned I = 0; I < Count; I++) {
+ auto OpI = MI.getOperand(I);
+ if (OpI.isReg() && OpI.getReg() != BaseReg)
+ return false;
+ }
+ return true;
+ };
+
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64::MOVZXi:
+ return MatchBaseReg(1);
+ case AArch64::MOVKXi:
+ return MatchBaseReg(2);
+ case AArch64::ORRXrs:
+ MachineOperand &Imm = MI.getOperand(3);
+ // Fourth operand of ORR must be 32 which mean
+ // 32bit symmetric constant load.
+ // ex) renamable $x8 = ORRXrs $x8, $x8, 32
+ if (MatchBaseReg(3) && Imm.isImm() && Imm.getImm() == 32)
+ return true;
+ }
+
+ return false;
+}
+
+MachineBasicBlock::iterator AArch64LoadStoreOpt::doFoldSymmetryConstantLoad(
+ MachineInstr &MI, SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
+ int SuccIndex, bool hasORR, int Accumulated) {
+ MachineBasicBlock::iterator I = MI.getIterator();
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
+ MachineBasicBlock::iterator FirstMovI;
+ MachineBasicBlock *MBB = MI.getParent();
+ uint64_t Mask = 0xFFFFUL;
+ Register DstRegW;
+
+ if (hasORR) {
+ (*MIs.begin())->eraseFromParent();
+ } else {
+ int Index = 0;
+ for (auto MI = MIs.begin(), E = MIs.end(); MI != E; ++MI, Index++) {
+ if (Index == SuccIndex - 1) {
+ FirstMovI = *MI;
+ break;
+ }
+ (*MI)->eraseFromParent();
+ }
+ DstRegW =
+ TRI->getSubReg(FirstMovI->getOperand(0).getReg(), AArch64::sub_32);
+
+ int Lower = Accumulated & Mask;
+ if (Lower) {
+ BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(),
+ TII->get(AArch64::MOVZWi), DstRegW)
+ .addImm(Lower)
+ .addImm(0);
+ Lower = (Accumulated >> 16) & Mask;
+ if (Lower) {
+ BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(),
+ TII->get(AArch64::MOVKWi), DstRegW)
+ .addUse(DstRegW)
+ .addImm(Lower)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16));
+ }
+ } else {
+ Lower = Accumulated >> 16 & Mask;
+ BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(),
+ TII->get(AArch64::MOVZWi), DstRegW)
+ .addImm(Lower)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16));
+ }
+ FirstMovI->eraseFromParent();
+ }
+
+ Register BaseReg = getLdStRegOp(MI).getReg();
+ const MachineOperand MO = AArch64InstrInfo::getLdStBaseOp(MI);
+ DstRegW = TRI->getSubReg(BaseReg, AArch64::sub_32);
+ unsigned DstRegState = getRegState(MI.getOperand(0));
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::STPWi))
+ .addReg(DstRegW, DstRegState)
+ .addReg(DstRegW, DstRegState)
+ .addReg(MO.getReg(), getRegState(MO))
+ .add(AArch64InstrInfo::getLdStOffsetOp(MI))
+ .setMemRefs(MI.memoperands())
+ .setMIFlags(MI.getFlags());
+ I->eraseFromParent();
+
+ return NextI;
+}
+
+bool AArch64LoadStoreOpt::tryFoldSymmetryConstantLoad(
+ MachineBasicBlock::iterator &I, unsigned Limit) {
+ MachineInstr &MI = *I;
+ if (MI.getOpcode() != AArch64::STRXui)
+ return false;
+
+ MachineBasicBlock::iterator MBBI = I;
+ MachineBasicBlock::iterator B = I->getParent()->begin();
+ if (MBBI == B)
+ return false;
+
+ Register BaseReg = getLdStRegOp(MI).getReg();
+ unsigned Count = 0, SuccIndex = 0;
+ bool hasORR = false;
+ SmallVector<MachineBasicBlock::iterator> MIs;
+ ModifiedRegUnits.clear();
+ UsedRegUnits.clear();
+
+ uint64_t Accumulated = 0, Mask = 0xFFFFUL;
+ do {
+ MBBI = prev_nodbg(MBBI, B);
+ MachineInstr &MI = *MBBI;
+ if (!MI.isTransient())
+ ++Count;
+ if (!isSymmetric(MI, BaseReg)) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
+ TRI);
+ if (!ModifiedRegUnits.available(BaseReg) ||
+ !UsedRegUnits.available(BaseReg))
+ return false;
+ continue;
+ }
+
+ unsigned Opc = MI.getOpcode();
+ if (Opc == AArch64::ORRXrs) {
+ hasORR = true;
+ MIs.push_back(MBBI);
+ continue;
+ }
+ unsigned ValueOrder = Opc == AArch64::MOVZXi ? 1 : 2;
+ MachineOperand Value = MI.getOperand(ValueOrder);
+ MachineOperand Shift = MI.getOperand(ValueOrder + 1);
+ if (!Value.isImm() || !Shift.isImm())
+ return false;
+
+ uint64_t IValue = Value.getImm();
+ uint64_t IShift = Shift.getImm();
+ Accumulated -= (Accumulated & (Mask << IShift));
+ Accumulated += (IValue << IShift);
+ MIs.push_back(MBBI);
+ if (Accumulated != 0 &&
+ (((Accumulated >> 32) == (Accumulated & 0xffffffffULL)) ||
+ (hasORR && Accumulated >> 32 == 0))) {
+ SuccIndex = MIs.size();
+ break;
+ }
+ } while (MBBI != B && Count < Limit);
+
+ if (SuccIndex) {
+ I = doFoldSymmetryConstantLoad(MI, MIs, SuccIndex, hasORR, Accumulated);
+ return true;
+ }
+
+ return false;
+}
+
bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
@@ -2518,6 +2687,26 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
++MBBI;
}
+ // We have an opportunity to optimize the `STRXui` instruction, which loads
+ // the same 32-bit value into a register twice. The `STPXi` instruction allows
+ // us to load a 32-bit value only once.
+ // Considering :
+ // renamable $x8 = MOVZXi 49370, 0
+ // renamable $x8 = MOVKXi $x8, 320, 16
+ // renamable $x8 = ORRXrs $x8, $x8, 32
+ // STRXui killed renamable $x8, killed renamable $x0, 0
+ // Transform :
+ // $w8 = MOVZWi 49370, 0
+ // $w8 = MOVKWi $w8, 320, 16
+ // STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ if (tryFoldSymmetryConstantLoad(MBBI, UpdateLimit))
+ Modified = true;
+ else
+ ++MBBI;
+ }
+
return Modified;
}
diff --git a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
index 8737738ab9025..4d687b37ade7a 100644
--- a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
@@ -97,10 +97,9 @@ define i64 @testuu0xf555f555f555f555() {
define void @test_store_0x1234567812345678(ptr %x) {
; CHECK-LABEL: test_store_0x1234567812345678:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #22136 // =0x5678
-; CHECK-NEXT: movk x8, #4660, lsl #16
-; CHECK-NEXT: orr x8, x8, x8, lsl #32
-; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: mov w8, #22136 // =0x5678
+; CHECK-NEXT: movk w8, #4660, lsl #16
+; CHECK-NEXT: stp w8, w8, [x0]
; CHECK-NEXT: ret
store i64 u0x1234567812345678, ptr %x
ret void
@@ -109,10 +108,9 @@ define void @test_store_0x1234567812345678(ptr %x) {
define void @test_store_0xff3456ffff3456ff(ptr %x) {
; CHECK-LABEL: test_store_0xff3456ffff3456ff:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #22271 // =0x56ff
-; CHECK-NEXT: movk x8, #65332, lsl #16
-; CHECK-NEXT: orr x8, x8, x8, lsl #32
-; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: mov w8, #22271 // =0x56ff
+; CHECK-NEXT: movk w8, #65332, lsl #16
+; CHECK-NEXT: stp w8, w8, [x0]
; CHECK-NEXT: ret
store i64 u0xff3456ffff3456ff, ptr %x
ret void
@@ -165,9 +163,8 @@ define void @test_store_0x0000555555555555(ptr %x) {
define void @test_store_0x0000555500005555(ptr %x) {
; CHECK-LABEL: test_store_0x0000555500005555:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #21845 // =0x5555
-; CHECK-NEXT: movk x8, #21845, lsl #32
-; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: mov w8, #21845 // =0x5555
+; CHECK-NEXT: stp w8, w8, [x0]
; CHECK-NEXT: ret
store i64 u0x0000555500005555, ptr %x
ret void
@@ -176,9 +173,8 @@ define void @test_store_0x0000555500005555(ptr %x) {
define void @test_store_0x5555000055550000(ptr %x) {
; CHECK-LABEL: test_store_0x5555000055550000:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov x8, #1431633920 // =0x55550000
-; CHECK-NEXT: movk x8, #21845, lsl #48
-; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: mov w8, #1431633920 // =0x55550000
+; CHECK-NEXT: stp w8, w8, [x0]
; CHECK-NEXT: ret
store i64 u0x5555000055550000, ptr %x
ret void
diff --git a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
index c6021b508cc08..28f43513ed084 100644
--- a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
+++ b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
@@ -42,10 +42,9 @@ body: |
; CHECK-LABEL: name: test_fold_repeating_constant_store
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $x8 = MOVZXi 49370, 0
- ; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 320, 16
- ; CHECK-NEXT: renamable $x8 = ORRXrs $x8, $x8, 32
- ; CHECK-NEXT: STRXui killed renamable $x8, killed renamable $x0, 0
+ ; CHECK-NEXT: $w8 = MOVZWi 49370, 0
+ ; CHECK-NEXT: $w8 = MOVKWi $w8, 320, 16
+ ; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
; CHECK-NEXT: RET undef $lr
renamable $x8 = MOVi64imm 90284035103834330
STRXui killed renamable $x8, killed renamable $x0, 0
@@ -60,10 +59,9 @@ body: |
; CHECK-LABEL: name: test_fold_repeating_constant_store_neg
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $x8 = MOVZXi 320, 0
- ; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 49370, 16
- ; CHECK-NEXT: renamable $x8 = ORRXrs $x8, $x8, 32
- ; CHECK-NEXT: STRXui killed renamable $x8, killed renamable $x0, 0
+ ; CHECK-NEXT: $w8 = MOVZWi 320, 0
+ ; CHECK-NEXT: $w8 = MOVKWi $w8, 49370, 16
+ ; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
; CHECK-NEXT: RET undef $lr
renamable $x8 = MOVi64imm -4550323095879417536
STRXui killed renamable $x8, killed renamable $x0, 0
@@ -78,9 +76,8 @@ body: |
; CHECK-LABEL: name: test_fold_repeating_constant_store_16bit_unit
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $x8 = MOVZXi 21845, 16
- ; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 21845, 48
- ; CHECK-NEXT: STRXui killed renamable $x8, killed renamable $x0, 0
+ ; CHECK-NEXT: $w8 = MOVZWi 21845, 16
+ ; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
; CHECK-NEXT: RET undef $lr
renamable $x8 = MOVZXi 21845, 16
renamable $x8 = MOVKXi $x8, 21845, 48
>From f677ea596d3839e3b0fb442c3402c85eb58c9919 Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Wed, 12 Jun 2024 04:32:38 +0900
Subject: [PATCH 3/4] remove only not necessary instead rebuild load sequence
---
.../AArch64/AArch64LoadStoreOptimizer.cpp | 74 ++++++-------------
.../CodeGen/AArch64/movimm-expand-ldst.ll | 12 +--
.../CodeGen/AArch64/movimm-expand-ldst.mir | 10 +--
3 files changed, 35 insertions(+), 61 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 09d9fa11a4959..294d28b9b8b95 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -207,7 +207,7 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
MachineBasicBlock::iterator
doFoldSymmetryConstantLoad(MachineInstr &MI,
SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
- int SuccIndex, bool hasORR, int Accumulated);
+ int UpperLoadIdx, int Accumulated);
bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
@@ -2260,7 +2260,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
return E;
}
-static bool isSymmetric(MachineInstr &MI, Register BaseReg) {
+static bool isSymmetricLoadCandidate(MachineInstr &MI, Register BaseReg) {
auto MatchBaseReg = [&](unsigned Count) {
for (unsigned I = 0; I < Count; I++) {
auto OpI = MI.getOperand(I);
@@ -2292,56 +2292,28 @@ static bool isSymmetric(MachineInstr &MI, Register BaseReg) {
MachineBasicBlock::iterator AArch64LoadStoreOpt::doFoldSymmetryConstantLoad(
MachineInstr &MI, SmallVectorImpl<MachineBasicBlock::iterator> &MIs,
- int SuccIndex, bool hasORR, int Accumulated) {
+ int UpperLoadIdx, int Accumulated) {
MachineBasicBlock::iterator I = MI.getIterator();
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator NextI = next_nodbg(I, E);
- MachineBasicBlock::iterator FirstMovI;
MachineBasicBlock *MBB = MI.getParent();
- uint64_t Mask = 0xFFFFUL;
- Register DstRegW;
- if (hasORR) {
+ if (!UpperLoadIdx) {
+ // ORR ensures that previous instructions load lower 32-bit constants.
+ // Remove ORR only.
(*MIs.begin())->eraseFromParent();
} else {
+ // We need to remove MOV for upper of 32bit because We know these instrs
+ // is part of symmetric constant.
int Index = 0;
- for (auto MI = MIs.begin(), E = MIs.end(); MI != E; ++MI, Index++) {
- if (Index == SuccIndex - 1) {
- FirstMovI = *MI;
- break;
- }
+ for (auto MI = MIs.begin(); Index < UpperLoadIdx; ++MI, Index++) {
(*MI)->eraseFromParent();
}
- DstRegW =
- TRI->getSubReg(FirstMovI->getOperand(0).getReg(), AArch64::sub_32);
-
- int Lower = Accumulated & Mask;
- if (Lower) {
- BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(),
- TII->get(AArch64::MOVZWi), DstRegW)
- .addImm(Lower)
- .addImm(0);
- Lower = (Accumulated >> 16) & Mask;
- if (Lower) {
- BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(),
- TII->get(AArch64::MOVKWi), DstRegW)
- .addUse(DstRegW)
- .addImm(Lower)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16));
- }
- } else {
- Lower = Accumulated >> 16 & Mask;
- BuildMI(*MBB, FirstMovI, FirstMovI->getDebugLoc(),
- TII->get(AArch64::MOVZWi), DstRegW)
- .addImm(Lower)
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16));
- }
- FirstMovI->eraseFromParent();
}
Register BaseReg = getLdStRegOp(MI).getReg();
const MachineOperand MO = AArch64InstrInfo::getLdStBaseOp(MI);
- DstRegW = TRI->getSubReg(BaseReg, AArch64::sub_32);
+ Register DstRegW = TRI->getSubReg(BaseReg, AArch64::sub_32);
unsigned DstRegState = getRegState(MI.getOperand(0));
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::STPWi))
.addReg(DstRegW, DstRegState)
@@ -2351,7 +2323,6 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::doFoldSymmetryConstantLoad(
.setMemRefs(MI.memoperands())
.setMIFlags(MI.getFlags());
I->eraseFromParent();
-
return NextI;
}
@@ -2367,19 +2338,18 @@ bool AArch64LoadStoreOpt::tryFoldSymmetryConstantLoad(
return false;
Register BaseReg = getLdStRegOp(MI).getReg();
- unsigned Count = 0, SuccIndex = 0;
- bool hasORR = false;
+ unsigned Count = 0, UpperLoadIdx = 0;
+ uint64_t Accumulated = 0, Mask = 0xFFFFUL;
+ bool hasORR = false, Found = false;
SmallVector<MachineBasicBlock::iterator> MIs;
ModifiedRegUnits.clear();
UsedRegUnits.clear();
-
- uint64_t Accumulated = 0, Mask = 0xFFFFUL;
do {
MBBI = prev_nodbg(MBBI, B);
MachineInstr &MI = *MBBI;
if (!MI.isTransient())
++Count;
- if (!isSymmetric(MI, BaseReg)) {
+ if (!isSymmetricLoadCandidate(MI, BaseReg)) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
if (!ModifiedRegUnits.available(BaseReg) ||
@@ -2402,19 +2372,23 @@ bool AArch64LoadStoreOpt::tryFoldSymmetryConstantLoad(
uint64_t IValue = Value.getImm();
uint64_t IShift = Shift.getImm();
- Accumulated -= (Accumulated & (Mask << IShift));
- Accumulated += (IValue << IShift);
+ uint64_t Adder = IValue << IShift;
MIs.push_back(MBBI);
+ if (Adder >> 32)
+ UpperLoadIdx = MIs.size();
+
+ Accumulated -= Accumulated & (Mask << IShift);
+ Accumulated += Adder;
if (Accumulated != 0 &&
(((Accumulated >> 32) == (Accumulated & 0xffffffffULL)) ||
- (hasORR && Accumulated >> 32 == 0))) {
- SuccIndex = MIs.size();
+ (hasORR && (Accumulated >> 32 == 0)))) {
+ Found = true;
break;
}
} while (MBBI != B && Count < Limit);
- if (SuccIndex) {
- I = doFoldSymmetryConstantLoad(MI, MIs, SuccIndex, hasORR, Accumulated);
+ if (Found) {
+ I = doFoldSymmetryConstantLoad(MI, MIs, UpperLoadIdx, Accumulated);
return true;
}
diff --git a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
index 4d687b37ade7a..dda41344b4cfd 100644
--- a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
@@ -97,8 +97,8 @@ define i64 @testuu0xf555f555f555f555() {
define void @test_store_0x1234567812345678(ptr %x) {
; CHECK-LABEL: test_store_0x1234567812345678:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #22136 // =0x5678
-; CHECK-NEXT: movk w8, #4660, lsl #16
+; CHECK-NEXT: mov x8, #22136 // =0x5678
+; CHECK-NEXT: movk x8, #4660, lsl #16
; CHECK-NEXT: stp w8, w8, [x0]
; CHECK-NEXT: ret
store i64 u0x1234567812345678, ptr %x
@@ -108,8 +108,8 @@ define void @test_store_0x1234567812345678(ptr %x) {
define void @test_store_0xff3456ffff3456ff(ptr %x) {
; CHECK-LABEL: test_store_0xff3456ffff3456ff:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #22271 // =0x56ff
-; CHECK-NEXT: movk w8, #65332, lsl #16
+; CHECK-NEXT: mov x8, #22271 // =0x56ff
+; CHECK-NEXT: movk x8, #65332, lsl #16
; CHECK-NEXT: stp w8, w8, [x0]
; CHECK-NEXT: ret
store i64 u0xff3456ffff3456ff, ptr %x
@@ -163,7 +163,7 @@ define void @test_store_0x0000555555555555(ptr %x) {
define void @test_store_0x0000555500005555(ptr %x) {
; CHECK-LABEL: test_store_0x0000555500005555:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #21845 // =0x5555
+; CHECK-NEXT: mov x8, #21845 // =0x5555
; CHECK-NEXT: stp w8, w8, [x0]
; CHECK-NEXT: ret
store i64 u0x0000555500005555, ptr %x
@@ -173,7 +173,7 @@ define void @test_store_0x0000555500005555(ptr %x) {
define void @test_store_0x5555000055550000(ptr %x) {
; CHECK-LABEL: test_store_0x5555000055550000:
; CHECK: // %bb.0:
-; CHECK-NEXT: mov w8, #1431633920 // =0x55550000
+; CHECK-NEXT: mov x8, #1431633920 // =0x55550000
; CHECK-NEXT: stp w8, w8, [x0]
; CHECK-NEXT: ret
store i64 u0x5555000055550000, ptr %x
diff --git a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
index 28f43513ed084..587b31f38dc8c 100644
--- a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
+++ b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
@@ -42,8 +42,8 @@ body: |
; CHECK-LABEL: name: test_fold_repeating_constant_store
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $w8 = MOVZWi 49370, 0
- ; CHECK-NEXT: $w8 = MOVKWi $w8, 320, 16
+ ; CHECK-NEXT: renamable $x8 = MOVZXi 49370, 0
+ ; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 320, 16
; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
; CHECK-NEXT: RET undef $lr
renamable $x8 = MOVi64imm 90284035103834330
@@ -59,8 +59,8 @@ body: |
; CHECK-LABEL: name: test_fold_repeating_constant_store_neg
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $w8 = MOVZWi 320, 0
- ; CHECK-NEXT: $w8 = MOVKWi $w8, 49370, 16
+ ; CHECK-NEXT: renamable $x8 = MOVZXi 320, 0
+ ; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 49370, 16
; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
; CHECK-NEXT: RET undef $lr
renamable $x8 = MOVi64imm -4550323095879417536
@@ -76,7 +76,7 @@ body: |
; CHECK-LABEL: name: test_fold_repeating_constant_store_16bit_unit
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: $w8 = MOVZWi 21845, 16
+ ; CHECK-NEXT: renamable $x8 = MOVZXi 21845, 16
; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
; CHECK-NEXT: RET undef $lr
renamable $x8 = MOVZXi 21845, 16
>From ffd7ee31f2f6b62ea65f4fc21a777be4b12cba9f Mon Sep 17 00:00:00 2001
From: hanbeom <kese111 at gmail.com>
Date: Tue, 18 Jun 2024 05:13:13 +0900
Subject: [PATCH 4/4] fix wrong Imm
---
llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 3 ++-
llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll | 8 ++++----
llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir | 6 +++---
3 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 294d28b9b8b95..69fe890784ee3 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -2315,11 +2315,12 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::doFoldSymmetryConstantLoad(
const MachineOperand MO = AArch64InstrInfo::getLdStBaseOp(MI);
Register DstRegW = TRI->getSubReg(BaseReg, AArch64::sub_32);
unsigned DstRegState = getRegState(MI.getOperand(0));
+ int OffsetStride = TII->getMemScale(MI);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::STPWi))
.addReg(DstRegW, DstRegState)
.addReg(DstRegW, DstRegState)
.addReg(MO.getReg(), getRegState(MO))
- .add(AArch64InstrInfo::getLdStOffsetOp(MI))
+ .addImm(OffsetStride)
.setMemRefs(MI.memoperands())
.setMIFlags(MI.getFlags());
I->eraseFromParent();
diff --git a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
index dda41344b4cfd..379f4098855f6 100644
--- a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.ll
@@ -99,7 +99,7 @@ define void @test_store_0x1234567812345678(ptr %x) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #22136 // =0x5678
; CHECK-NEXT: movk x8, #4660, lsl #16
-; CHECK-NEXT: stp w8, w8, [x0]
+; CHECK-NEXT: stp w8, w8, [x0, #32]
; CHECK-NEXT: ret
store i64 u0x1234567812345678, ptr %x
ret void
@@ -110,7 +110,7 @@ define void @test_store_0xff3456ffff3456ff(ptr %x) {
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #22271 // =0x56ff
; CHECK-NEXT: movk x8, #65332, lsl #16
-; CHECK-NEXT: stp w8, w8, [x0]
+; CHECK-NEXT: stp w8, w8, [x0, #32]
; CHECK-NEXT: ret
store i64 u0xff3456ffff3456ff, ptr %x
ret void
@@ -164,7 +164,7 @@ define void @test_store_0x0000555500005555(ptr %x) {
; CHECK-LABEL: test_store_0x0000555500005555:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #21845 // =0x5555
-; CHECK-NEXT: stp w8, w8, [x0]
+; CHECK-NEXT: stp w8, w8, [x0, #32]
; CHECK-NEXT: ret
store i64 u0x0000555500005555, ptr %x
ret void
@@ -174,7 +174,7 @@ define void @test_store_0x5555000055550000(ptr %x) {
; CHECK-LABEL: test_store_0x5555000055550000:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #1431633920 // =0x55550000
-; CHECK-NEXT: stp w8, w8, [x0]
+; CHECK-NEXT: stp w8, w8, [x0, #32]
; CHECK-NEXT: ret
store i64 u0x5555000055550000, ptr %x
ret void
diff --git a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
index 587b31f38dc8c..be7ef95f28673 100644
--- a/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
+++ b/llvm/test/CodeGen/AArch64/movimm-expand-ldst.mir
@@ -44,7 +44,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $x8 = MOVZXi 49370, 0
; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 320, 16
- ; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
+ ; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 8
; CHECK-NEXT: RET undef $lr
renamable $x8 = MOVi64imm 90284035103834330
STRXui killed renamable $x8, killed renamable $x0, 0
@@ -61,7 +61,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $x8 = MOVZXi 320, 0
; CHECK-NEXT: renamable $x8 = MOVKXi $x8, 49370, 16
- ; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
+ ; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 8
; CHECK-NEXT: RET undef $lr
renamable $x8 = MOVi64imm -4550323095879417536
STRXui killed renamable $x8, killed renamable $x0, 0
@@ -77,7 +77,7 @@ body: |
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: renamable $x8 = MOVZXi 21845, 16
- ; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 0
+ ; CHECK-NEXT: STPWi killed renamable $w8, killed renamable $w8, killed renamable $x0, 8
; CHECK-NEXT: RET undef $lr
renamable $x8 = MOVZXi 21845, 16
renamable $x8 = MOVKXi $x8, 21845, 48
More information about the llvm-commits
mailing list