[llvm] 1f54ef7 - [AArch64] Only clear kill flags if necessary when merging str (#69680)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 2 17:03:26 PDT 2023


Author: Zhaoxuan Jiang
Date: 2023-11-02T17:03:21-07:00
New Revision: 1f54ef78d57b374dbc86a76419b0ced5162c5bdc

URL: https://github.com/llvm/llvm-project/commit/1f54ef78d57b374dbc86a76419b0ced5162c5bdc
DIFF: https://github.com/llvm/llvm-project/commit/1f54ef78d57b374dbc86a76419b0ced5162c5bdc.diff

LOG: [AArch64] Only clear kill flags if necessary when merging str (#69680)

Previously the kill flags of the source register were unconditionally
cleared when a `str` pair was merged, which results in suboptimal
register allocation and inhibits some renaming opportunities which may
allow further merging `str`.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
    llvm/test/CodeGen/AArch64/irg-nomem.mir
    llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
    llvm/test/CodeGen/AArch64/ldst-opt-aa.mir
    llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
    llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 4a7805719bc5729..299ea43a539edb7 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1017,15 +1017,23 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
   MachineBasicBlock *MBB = I->getParent();
   MachineOperand RegOp0 = getLdStRegOp(*RtMI);
   MachineOperand RegOp1 = getLdStRegOp(*Rt2MI);
+  MachineOperand &PairedRegOp = RtMI == &*Paired ? RegOp0 : RegOp1;
   // Kill flags may become invalid when moving stores for pairing.
   if (RegOp0.isUse()) {
     if (!MergeForward) {
       // Clear kill flags on store if moving upwards. Example:
-      //   STRWui %w0, ...
+      //   STRWui kill %w0, ...
       //   USE %w1
       //   STRWui kill %w1  ; need to clear kill flag when moving STRWui upwards
-      RegOp0.setIsKill(false);
-      RegOp1.setIsKill(false);
+      // We are about to move the store of w1, so its kill flag may become
+      // invalid; not the case for w0.
+      // Since w1 is used between the stores, the kill flag on w1 is cleared
+      // after merging.
+      //   STPWi kill %w0, %w1, ...
+      //   USE %w1
+      for (auto It = std::next(I); It != Paired && PairedRegOp.isKill(); ++It)
+        if (It->readsRegister(PairedRegOp.getReg(), TRI))
+          PairedRegOp.setIsKill(false);
     } else {
       // Clear kill flags of the first stores register. Example:
       //   STRWui %w1, ...

diff  --git a/llvm/test/CodeGen/AArch64/irg-nomem.mir b/llvm/test/CodeGen/AArch64/irg-nomem.mir
index d428f16011a700e..bc247b0dbf9e307 100644
--- a/llvm/test/CodeGen/AArch64/irg-nomem.mir
+++ b/llvm/test/CodeGen/AArch64/irg-nomem.mir
@@ -62,7 +62,7 @@ body:             |
     $w9 = MOVZWi 1, 0, implicit-def $x9
 
     ; Check that stores are merged across IRG.
-    ; CHECK: STPXi renamable $x9, renamable $x9, renamable $x0, 0
+    ; CHECK: STPXi renamable $x9, killed renamable $x9, renamable $x0, 0
 
     STRXui renamable $x9, renamable $x0, 0 :: (store (s64) into %ir.x)
     dead renamable $x10 = IRG renamable $x8, $xzr

diff  --git a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
index 647130ba7d908fc..b76570da781f5a6 100644
--- a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
+++ b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir
@@ -17,7 +17,7 @@ body:             |
     ; CHECK: liveins: $w0, $w1, $x1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: early-clobber $x1, renamable $w0, renamable $w2 = LDPWpre renamable $x1, 5 :: (load (s32))
-    ; CHECK-NEXT: STPWi renamable $w0, renamable $w2, renamable $x1, 0 :: (store (s32))
+    ; CHECK-NEXT: STPWi renamable $w0, killed renamable $w2, renamable $x1, 0 :: (store (s32))
     ; CHECK-NEXT: RET undef $lr
     early-clobber renamable $x1, renamable $w0 = LDRWpre killed renamable $x1, 20 :: (load (s32))
     renamable $w2 = LDRWui renamable $x1, 1 :: (load (s32))

diff  --git a/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir b/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir
index 9f2680aca970e9d..16369af9ccc6dd3 100644
--- a/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir
+++ b/llvm/test/CodeGen/AArch64/ldst-opt-aa.mir
@@ -15,7 +15,7 @@
 ---
 # CHECK-LABEL: name: ldr_str_aa
 # CHECK: $w8, $w9 = LDPWi $x1, 0
-# CHECK: STPWi $w8, $w9, $x0, 0
+# CHECK: STPWi killed $w8, killed $w9, $x0, 0
 name:            ldr_str_aa
 tracksRegLiveness: true
 body:             |

diff  --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir
index 2c24383e6baa349..41d84a0d1cb079c 100644
--- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir
+++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming-debug.mir
@@ -61,7 +61,7 @@ body:             |
 # CHECK-NEXT:  DBG_VALUE $x0, $noreg,
 # CHECK-NEXT:  STRXui killed renamable $x8, renamable $x19, 2 :: (store (s64))
 # CHECK-NEXT:  $x8 = ADDXrs renamable $x0, killed renamable $x20, 0
-# CHECK-NEXT:  STPXi $xzr, renamable $x8, renamable $x19, 0 :: (store (s64))
+# CHECK-NEXT:  STPXi $xzr, killed renamable $x8, renamable $x19, 0 :: (store (s64))
 # CHECK-NEXT:  RET undef $lr, implicit $x0
 name:            test_dbg_value2
 alignment:       4

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
index 7ac1b58c626d191..da7e772461e28bc 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
@@ -38,10 +38,10 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    bl __extenddftf2
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    ldr q2, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    stp q1, q0, [x19]
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT:    stp q0, q2, [x19, #32]
+; CHECK-NEXT:    stp q0, q1, [x19, #32]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    add sp, sp, #48
 ; CHECK-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload

diff  --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
index 110fa4d9b1cf1f4..97b17d98d347209 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected
@@ -133,8 +133,8 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" }
 ; CHECK-LABEL: OUTLINED_FUNCTION_0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w9, #2 // =0x2
-; CHECK-NEXT:    mov w10, #3 // =0x3
 ; CHECK-NEXT:    stp w9, w8, [x29, #-12]
+; CHECK-NEXT:    mov w9, #3 // =0x3
 ; CHECK-NEXT:    mov w8, #4 // =0x4
-; CHECK-NEXT:    stp w8, w10, [sp, #12]
+; CHECK-NEXT:    stp w8, w9, [sp, #12]
 ; CHECK-NEXT:    ret


        


More information about the llvm-commits mailing list