[llvm] [StackSlotColoring] Ignore non-spill objects in RemoveDeadStores. (PR #80242)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 31 21:15:27 PST 2024


https://github.com/topperc created https://github.com/llvm/llvm-project/pull/80242

The stack slot coloring pass is concerned with optimizing spill
slots. If any change is a pass is made over the function to remove
stack stores that use the same register and stack slot as an
immediately preceding load.
    
The register check is too simple for constant registers like AArch64
and RISC-V's zero register. This register can be used as the result
of a load if we want to discard the result, but still have the memory
access performed. Like for a volatile or atomic load.
    
If the code sees a load from the zero register followed by a store
of the zero register at the same stack slot, the pass mistakenly
believes the store isn't needed.
    
Since the main stack coloring optimization is only concerned with
spill slots, it seems reasonable that RemoveDeadStores should
only be concerned with spills. Since we never generate a reload of
x0, this avoids the issue seen by RISC-V.
    
Test case concept is adapted from pr30821.mir from X86. That test
had to be updated to mark the stack slot as a spill slot.
    
Fixes #80052.

>From 19f19160e556d2ab35b579ab5b12de8fa1a0b575 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 31 Jan 2024 19:53:29 -0800
Subject: [PATCH 1/2] [RISCV] Add test case for PR80052. NFC

---
 llvm/test/CodeGen/RISCV/pr80052.mir | 121 ++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/pr80052.mir

diff --git a/llvm/test/CodeGen/RISCV/pr80052.mir b/llvm/test/CodeGen/RISCV/pr80052.mir
new file mode 100644
index 0000000000000..eb12968f39939
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/pr80052.mir
@@ -0,0 +1,121 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc %s -mtriple=riscv64 -run-pass=greedy,virtregrewriter,stack-slot-coloring -o - | FileCheck %s
+
+--- |
+  define void @foo() {
+  entry:
+    ; Dummy test just to hold the alloca
+    %alpha = alloca i32, i32 0, align 4
+    ret void
+  }
+
+...
+---
+name:            foo
+alignment:       4
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+  localFrameSize:  4
+stack:
+  - { id: 0, name: alpha, size: 1, alignment: 4, local-offset: -4 }
+machineFunctionInfo:
+  varArgsFrameIndex: 0
+  varArgsSaveSize: 0
+body:             |
+  bb.0.entry:
+    ; To trick stack-slot-colouring to run its dead-store-elimination phase,
+    ; which is at fault, we need the register allocator to run, and spill in two
+    ; places that can have their slots merged. Achieve this by volatile-loading
+    ; data into all allocatable GPRs except $x31. Then, volatile storing them
+    ; later, leaving regalloc only $x31 to play with in the middle.
+    $x1 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x5 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x6 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x7 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x8 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x9 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x10 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x11 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x12 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x13 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x14 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x15 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x16 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x17 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x18 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x19 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x20 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x21 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x22 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x23 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x24 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x25 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x26 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x27 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x28 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x29 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    $x30 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+
+    ; Force the first spill.
+    %0:gpr = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    %1:gpr = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    SW %1, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW %0, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+
+    ; CHECK: renamable $x31 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    ; CHECK-NEXT: SD killed renamable $x31, %stack.1, 0 :: (store (s64) into %stack.1)
+    ; CHECK-NEXT: renamable $x31 = LD %stack.1, 0 :: (load (s64) from %stack.1)
+    ; CHECK-NEXT: SW killed renamable $x31, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+
+    ; stack-slot-coloring doesn't know that a write to $x0 is discarded.
+    dead $x0 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    ; This stores 0 rather than the result of the preceding load since $x0
+    ; is special.
+    ; We don't want this store to be deleted.
+    SW $x0, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+
+    ; CHECK-NEXT: dead $x0 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    ; There should be a store of $x0 here.
+
+    ; Force a second spill
+    %2:gpr = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    %3:gpr = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    SW %3, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW %2, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+
+    ; CHECK-NEXT: renamable $x31 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    ; CHECK-NEXT: SD killed renamable $x31, %stack.1, 0 :: (store (s64) into %stack.1)
+    ; CHECK-NEXT: renamable $x31 = LD %stack.1, 0 :: (load (s64) from %stack.1)
+    ; CHECK-NEXT: SW killed renamable $x31, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+
+    SW $x1, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x5, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x6, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x7, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x8, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x9, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x10, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x11, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x12, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x13, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x14, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x15, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x16, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x17, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x18, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x19, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x20, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x21, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x22, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x23, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x24, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x25, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x26, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x27, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x28, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x29, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    SW $x30, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
+    PseudoRET
+
+...

>From 739901b7d771693f94a9cd14dbd71aaed1d22f5f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 31 Jan 2024 20:41:23 -0800
Subject: [PATCH 2/2] [StackSlotColoring] Ignore non-spill objects in
 RemoveDeadStores.

The stack slot coloring pass is concerned with optimizing spill
slots. If any change is a pass is made over the function to remove
stack stores that use the same register and stack slot as an
immediately preceding load.

The register check is too simple for constant registers like AArch64
and RISC-V's zero register. This register can be used as the result
of a load if we want to discard the result, but still have the memory
access performed. Like for a volatile or atomic load.

If the code sees a load from the zero register followed by a store
of the zero register at the same stack slot, the pass mistakenly
believes the store isn't needed.

Since the main stack coloring optimization is only concerned with
spill slots, it seems reasonable that RemoveDeadStores should
only be concerned with spills. Since we never generate a reload of
x0, this avoids the issue seen by RISC-V.

Test case concept is adapted from pr30821.mir from X86. That test
had to be updated to mark the stack slot as a spill slot.

Fixes #80052.
---
 llvm/lib/CodeGen/StackSlotColoring.cpp | 2 +-
 llvm/test/CodeGen/RISCV/pr80052.mir    | 6 +++++-
 llvm/test/CodeGen/X86/pr30821.mir      | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index c180f4d8f0365..6d3fc740b292a 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -480,7 +480,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
     if (!(StoreReg = TII->isStoreToStackSlot(*NextMI, SecondSS, StoreSize)))
       continue;
     if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1 ||
-        LoadSize != StoreSize)
+        LoadSize != StoreSize || !MFI->isSpillSlotObjectIndex(FirstSS))
       continue;
 
     ++NumDead;
diff --git a/llvm/test/CodeGen/RISCV/pr80052.mir b/llvm/test/CodeGen/RISCV/pr80052.mir
index eb12968f39939..8006697c3bf27 100644
--- a/llvm/test/CodeGen/RISCV/pr80052.mir
+++ b/llvm/test/CodeGen/RISCV/pr80052.mir
@@ -65,6 +65,8 @@ body:             |
 
     ; CHECK: renamable $x31 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
     ; CHECK-NEXT: SD killed renamable $x31, %stack.1, 0 :: (store (s64) into %stack.1)
+    ; CHECK-NEXT: renamable $x31 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    ; CHECK-NEXT: SW killed renamable $x31, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
     ; CHECK-NEXT: renamable $x31 = LD %stack.1, 0 :: (load (s64) from %stack.1)
     ; CHECK-NEXT: SW killed renamable $x31, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
 
@@ -76,7 +78,7 @@ body:             |
     SW $x0, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
 
     ; CHECK-NEXT: dead $x0 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
-    ; There should be a store of $x0 here.
+    ; CHECK-NEXT: SW $x0, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
 
     ; Force a second spill
     %2:gpr = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
@@ -86,6 +88,8 @@ body:             |
 
     ; CHECK-NEXT: renamable $x31 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
     ; CHECK-NEXT: SD killed renamable $x31, %stack.1, 0 :: (store (s64) into %stack.1)
+    ; CHECK-NEXT: renamable $x31 = LW %stack.0.alpha, 0 :: (volatile load (s32) from %ir.alpha)
+    ; CHECK-NEXT: SW killed renamable $x31, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
     ; CHECK-NEXT: renamable $x31 = LD %stack.1, 0 :: (load (s64) from %stack.1)
     ; CHECK-NEXT: SW killed renamable $x31, %stack.0.alpha, 0 :: (volatile store (s32) into %ir.alpha)
 
diff --git a/llvm/test/CodeGen/X86/pr30821.mir b/llvm/test/CodeGen/X86/pr30821.mir
index 9591d45e7baff..992ef8bbe55f0 100644
--- a/llvm/test/CodeGen/X86/pr30821.mir
+++ b/llvm/test/CodeGen/X86/pr30821.mir
@@ -51,7 +51,7 @@ stack:
   - { id: 1, name: foxtrot, type: default, offset: 0, size: 16, alignment: 16,
       stack-id: default, callee-saved-register: '', callee-saved-restored: true,
       debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
-  - { id: 2, name: india, type: default, offset: 0, size: 16, alignment: 16,
+  - { id: 2, name: india, type: spill-slot, offset: 0, size: 16, alignment: 16,
       stack-id: default, callee-saved-register: '', callee-saved-restored: true,
       debug-info-variable: '', debug-info-expression: '',
       debug-info-location: '' }



More information about the llvm-commits mailing list