[llvm] [AArch64] Only clear kill flags if necessary when merging str (PR #69680)
Zhaoxuan Jiang via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 22:01:49 PDT 2023
https://github.com/nocchijiang updated https://github.com/llvm/llvm-project/pull/69680
>From 40ba1d2789c2e6c4fbbef48f12199f39e06641b1 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Fri, 20 Oct 2023 14:22:32 +0800
Subject: [PATCH 1/3] [AArch64] Only clear kill flags if necessary when merging
str
Previously the kill flags of the source register were unconditionally
cleared when a str pair was merged, which results in suboptimal register
allocation and inhibits some renaming opportunities which may allow
further merging str.
---
.../Target/AArch64/AArch64LoadStoreOptimizer.cpp | 13 +++++++++++--
llvm/test/CodeGen/AArch64/irg-nomem.mir | 2 +-
llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir | 2 +-
llvm/test/CodeGen/AArch64/ldst-opt-aa.mir | 2 +-
.../CodeGen/AArch64/stp-opt-with-renaming-debug.mir | 2 +-
llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll | 4 ++--
.../aarch64_generated_funcs.ll.generated.expected | 4 ++--
7 files changed, 19 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index c93fd02a821dcf9..0dc787c524daa96 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -997,8 +997,17 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
// STRWui %w0, ...
// USE %w1
// STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
- RegOp0.setIsKill(false);
- RegOp1.setIsKill(false);
+ for (auto It = std::next(I);
+ It != Paired && (RegOp0.isKill() || RegOp1.isKill()); ++It) {
+ auto ClearKill = [](MachineInstr &MI, MachineOperand &MOP,
+ const TargetRegisterInfo *TRI) {
+ Register Reg = MOP.getReg();
+ if (MI.readsRegister(Reg, TRI) || MI.modifiesRegister(Reg, TRI))
+ MOP.setIsKill(false);
+ };
+ ClearKill(*It, RegOp0, TRI);
+ ClearKill(*It, RegOp1, TRI);
+ }
} else {
// Clear kill flags of the first stores register. Example:
// STRWui %w1, ...
diff --git a/llvm/test/CodeGen/AArch64/irg-nomem.mir b/llvm/test/CodeGen/AArch64/irg-nomem.mir
index d428f16011a700e..bc247b0dbf9e307 100644
--- a/llvm/test/CodeGen/AArch64/irg-nomem.mir
+++ b/llvm/test/CodeGen/AArch64/irg-nomem.mir
@@ -62,7 +62,7 @@ body: |
$w9 = MOVZWi 1, 0, implicit-def $x9
; Check that stores are merged across IRG.
- ; CHECK: STPXi renamable $x9, renamable $x9, renamable $x0, 0
+ ; CHECK: STPXi renamable $x9, killed renamable $x9, renamable $x0, 0
STRXui renamable $x9, renamable $x0, 0 :: (store (s64) into %ir.x)
dead renamable $x10 = IRG renamable $x8, $xzr
diff --git a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
index 647130ba7d908fc..b76570da781f5a6 100644
--- a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
+++ b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
@@ -17,7 +17,7 @@ body: |
; CHECK: liveins: $w0, $w1, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber $x1, renamable $w0, renamable $w2 = LDPWpre renamable $x1, 5 :: (load (s32))
- ; CHECK-NEXT: STPWi renamable $w0, renamable $w2, renamable $x1, 0 :: (store (s32))
+ ; CHECK-NEXT: STPWi renamable $w0, killed renamable $w2, renamable $x1, 0 :: (store (s32))
; CHECK-NEXT: RET undef $lr
early-clobber renamable $x1, renamable $w0 = LDRWpre killed renamable $x1, 20 :: (load (s32))
renamable $w2 = LDRWui renamable $x1, 1 :: (load (s32))
diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir b/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir
index 9f2680aca970e9d..16369af9ccc6dd3 100644
--- a/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir
+++ b/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir
@@ -15,7 +15,7 @@
---
# CHECK-LABEL: name: ldr_str_aa
# CHECK: $w8, $w9 = LDPWi $x1, 0
-# CHECK: STPWi $w8, $w9, $x0, 0
+# CHECK: STPWi killed $w8, killed $w9, $x0, 0
name: ldr_str_aa
tracksRegLiveness: true
body: |
diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir
index 2c24383e6baa349..41d84a0d1cb079c 100644
--- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir
+++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir
@@ -61,7 +61,7 @@ body: |
# CHECK-NEXT: DBG_VALUE $x0, $noreg,
# CHECK-NEXT: STRXui killed renamable $x8, renamable $x19, 2 :: (store (s64))
# CHECK-NEXT: $x8 = ADDXrs renamable $x0, killed renamable $x20, 0
-# CHECK-NEXT: STPXi $xzr, renamable $x8, renamable $x19, 0 :: (store (s64))
+# CHECK-NEXT: STPXi $xzr, killed renamable $x8, renamable $x19, 0 :: (store (s64))
# CHECK-NEXT: RET undef $lr, implicit $x0
name: test_dbg_value2
alignment: 4
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
index 7ac1b58c626d191..da7e772461e28bc 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
@@ -38,10 +38,10 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: bl __extenddftf2
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: ldr q2, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: stp q1, q0, [x19]
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: stp q0, q2, [x19, #32]
+; CHECK-NEXT: stp q0, q1, [x19, #32]
; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
index 110fa4d9b1cf1f4..97b17d98d347209 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
@@ -133,8 +133,8 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
; CHECK-LABEL: OUTLINED_FUNCTION_0:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w9, #2 // =0x2
-; CHECK-NEXT: mov w10, #3 // =0x3
; CHECK-NEXT: stp w9, w8, [x29, #-12]
+; CHECK-NEXT: mov w9, #3 // =0x3
; CHECK-NEXT: mov w8, #4 // =0x4
-; CHECK-NEXT: stp w8, w10, [sp, #12]
+; CHECK-NEXT: stp w8, w9, [sp, #12]
; CHECK-NEXT: ret
>From f46207fadd4b45d22dde9ac4db83f3ac209b0663 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Tue, 24 Oct 2023 16:34:51 +0800
Subject: [PATCH 2/3] remove redundant modifiesRegister check
---
llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 0dc787c524daa96..cb6f55d3b6355bc 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1002,7 +1002,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
auto ClearKill = [](MachineInstr &MI, MachineOperand &MOP,
const TargetRegisterInfo *TRI) {
Register Reg = MOP.getReg();
- if (MI.readsRegister(Reg, TRI) || MI.modifiesRegister(Reg, TRI))
+ if (MI.readsRegister(Reg, TRI))
MOP.setIsKill(false);
};
ClearKill(*It, RegOp0, TRI);
>From f5948505dde3080f4ca205780c0e1802dfd8c1e6 Mon Sep 17 00:00:00 2001
From: Zhaoxuan Jiang <jiangzhaoxuan94 at gmail.com>
Date: Thu, 2 Nov 2023 13:01:13 +0800
Subject: [PATCH 3/3] only clear kill flag for the MI being moved
---
.../AArch64/AArch64LoadStoreOptimizer.cpp | 23 +++++++++----------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index cb6f55d3b6355bc..72c8ef0ea4ee71a 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -990,24 +990,23 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock *MBB = I->getParent();
MachineOperand RegOp0 = getLdStRegOp(*RtMI);
MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
+ MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
// Kill flags may become invalid when moving stores for pairing.
if (RegOp0.isUse()) {
if (!MergeForward) {
// Clear kill flags on store if moving upwards. Example:
- // STRWui %w0, ...
+ // STRWui kill %w0, ...
// USE %w1
// STRWui kill %w1 ; need to clear kill flag when moving STRWui upwards
- for (auto It = std::next(I);
- It != Paired && (RegOp0.isKill() || RegOp1.isKill()); ++It) {
- auto ClearKill = [](MachineInstr &MI, MachineOperand &MOP,
- const TargetRegisterInfo *TRI) {
- Register Reg = MOP.getReg();
- if (MI.readsRegister(Reg, TRI))
- MOP.setIsKill(false);
- };
- ClearKill(*It, RegOp0, TRI);
- ClearKill(*It, RegOp1, TRI);
- }
+ // We are about to move the store of w1, so its kill flag may become
+ // invalid; not the case for w0.
+ // Since w1 is used between the stores, the kill flag on w1 is cleared
+ // after merging.
+ // STPWi kill %w0, %w1, ...
+ // USE %w1
+ for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
+ if (It->readsRegister(PairedRegOp.getReg(), TRI))
+ PairedRegOp.setIsKill(false);
} else {
// Clear kill flags of the first stores register. Example:
// STRWui %w1, ...
More information about the llvm-commits
mailing list