[llvm] [MachineSink] Clear kill flags of sunk addressing mode registers (PR #75072)

Momchil Velikov via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 12 03:31:38 PST 2023


https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75072

>From e9fcb408f9fb6106502ca3ff3c8863b825367994 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Mon, 11 Dec 2023 16:31:35 +0000
Subject: [PATCH 1/3] [MachineSink] Clear kill flags of sunk addressing mode
 registers

When doing sink-and-fold, the MachineSink clears the "killed" flags
of the operands of the sunk (and deleted) instruction. However, this is
not always sufficient. In some cases we can create the new load/store
instruction with operands other than the ones present in the deleted
instruction. One such example is folding a zero word extend into
a memory load on AArch64. The zero-extend is represented by a pair
of instructions - `MOV` (i.e. `ORRwrs`) followed by a `SUBREG_TO_REG`.
The `SUBREG_TO_REG` is deleted (it is the sunk instruction), but the new
load instruction mentions operands "killed" in the `MOV`, which is no
longer correct.

To fix this, clear the "killed" flags of the registers paritipating in
the adressing mode.
---
 llvm/lib/CodeGen/MachineSink.cpp              |  13 +-
 .../sink-and-fold-clear-kill-flags.mir        | 134 ++++++++++++++++++
 2 files changed, 142 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir

diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 83d775055dfd73..6441b456cc39d1 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -500,11 +500,6 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
     return false;
 
   // Now we know we can fold the instruction in all its users.
-  if (UsedRegA)
-    MRI->clearKillFlags(UsedRegA);
-  if (UsedRegB)
-    MRI->clearKillFlags(UsedRegB);
-
   for (auto &[SinkDst, MaybeAM] : SinkInto) {
     MachineInstr *New = nullptr;
     LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into";
@@ -530,6 +525,14 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
     } else {
       // Fold instruction into the addressing mode of a memory instruction.
       New = TII->emitLdStWithAddr(*SinkDst, MaybeAM);
+
+      // The registers of the addressing mode may have their live range extended
+      // and their kill flags may no longer be correct. Conservatively clear the
+      // kill flags.
+      if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual())
+        MRI->clearKillFlags(R);
+      if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual())
+        MRI->clearKillFlags(R);
     }
     LLVM_DEBUG(dbgs() << "yielding"; New->dump());
     // Clear the StoreInstrCache, since we may invalidate it by erasing.
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
new file mode 100644
index 00000000000000..effc346848b757
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
@@ -0,0 +1,134 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --run-pass=machine-sink %s -o - | FileCheck %s
+
+# Test that the "killed" flags is not present in the ORRWrs instruction below
+--- |
+  source_filename = "crash.ll"
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64-linux"
+
+  define i32 @f(ptr %image, i32 %i) {
+  entry:
+    %add = add i32 %i, 1
+    %idx = zext i32 %add to i64
+    br label %A
+
+  A:                                                ; preds = %B, %A, %entry
+    %sunkaddr = getelementptr i8, ptr %image, i64 %idx
+    %0 = load i8, ptr %sunkaddr, align 1
+    %cmp153 = icmp eq i8 %0, 0
+    br i1 %cmp153, label %B, label %A
+
+  B:                                                ; preds = %A
+    store i32 0, ptr %image, align 1
+    br label %A
+  }
+
+...
+---
+name:            f
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHCatchret:   false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: gpr64, preferred-register: '' }
+  - { id: 1, class: gpr64common, preferred-register: '' }
+  - { id: 2, class: gpr32common, preferred-register: '' }
+  - { id: 3, class: gpr32common, preferred-register: '' }
+  - { id: 4, class: gpr32, preferred-register: '' }
+  - { id: 5, class: gpr32, preferred-register: '' }
+  - { id: 6, class: gpr32, preferred-register: '' }
+liveins:
+  - { reg: '$x0', virtual-reg: '%1' }
+  - { reg: '$w1', virtual-reg: '%2' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 0
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  ; CHECK-LABEL: name: f
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32common = COPY $w1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
+  ; CHECK-NEXT:   [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 1, 0
+  ; CHECK-NEXT:   [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWri]], 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.A:
+  ; CHECK-NEXT:   successors: %bb.2(0x30000000), %bb.1(0x50000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[LDRBBroW:%[0-9]+]]:gpr32 = LDRBBroW [[COPY1]], [[ADDWri]], 0, 0 :: (load (s8) from %ir.sunkaddr)
+  ; CHECK-NEXT:   CBNZW killed [[LDRBBroW]], %bb.1
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.B:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
+  ; CHECK-NEXT:   STRWui [[COPY2]], [[COPY1]], 0 :: (store (s32) into %ir.image, align 1)
+  ; CHECK-NEXT:   B %bb.1
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $x0, $w1
+
+    %2:gpr32common = COPY $w1
+    %1:gpr64common = COPY $x0
+    %3:gpr32common = ADDWri %2, 1, 0
+    %4:gpr32 = ORRWrs $wzr, killed %3, 0
+    %0:gpr64 = SUBREG_TO_REG 0, killed %4, %subreg.sub_32
+
+  bb.1.A:
+    successors: %bb.2(0x30000000), %bb.1(0x50000000)
+
+    %5:gpr32 = LDRBBroX %1, %0, 0, 0 :: (load (s8) from %ir.sunkaddr)
+    CBNZW killed %5, %bb.1
+    B %bb.2
+
+  bb.2.B:
+    successors: %bb.1(0x80000000)
+
+    %6:gpr32 = COPY $wzr
+    STRWui %6, %1, 0 :: (store (s32) into %ir.image, align 1)
+    B %bb.1
+
+...

>From 3e487cc634ff4fe7744e10678cf1b4f8731b5115 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Mon, 11 Dec 2023 22:15:40 +0000
Subject: [PATCH 2/3] [fixup] Remove some cruft from a test

---
 .../sink-and-fold-clear-kill-flags.mir        | 43 -------------------
 1 file changed, 43 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
index effc346848b757..732af0890d7184 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
@@ -28,22 +28,7 @@
 ---
 name:            f
 alignment:       4
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-failedISel:      false
 tracksRegLiveness: true
-hasWinCFI:       false
-callsEHReturn:   false
-callsUnwindInit: false
-hasEHCatchret:   false
-hasEHScopes:     false
-hasEHFunclets:   false
-isOutlined:      false
-debugInstrRef:   false
-failsVerification: false
-tracksDebugUserValues: false
 registers:
   - { id: 0, class: gpr64, preferred-register: '' }
   - { id: 1, class: gpr64common, preferred-register: '' }
@@ -55,34 +40,6 @@ registers:
 liveins:
   - { reg: '$x0', virtual-reg: '%1' }
   - { reg: '$w1', virtual-reg: '%2' }
-frameInfo:
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       0
-  offsetAdjustment: 0
-  maxAlignment:    1
-  adjustsStack:    false
-  hasCalls:        false
-  stackProtector:  ''
-  functionContext: ''
-  maxCallFrameSize: 0
-  cvBytesOfCalleeSavedRegisters: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-  hasTailCall:     false
-  localFrameSize:  0
-  savePoint:       ''
-  restorePoint:    ''
-fixedStack:      []
-stack:           []
-entry_values:    []
-callSites:       []
-debugValueSubstitutions: []
-constants:       []
-machineFunctionInfo: {}
 body:             |
   ; CHECK-LABEL: name: f
   ; CHECK: bb.0.entry:

>From d23a5e40dfcdfb34c60e416c6ef3ee37248f43a2 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 12 Dec 2023 11:28:31 +0000
Subject: [PATCH 3/3] [fixup] Clear "killed" flags when sinking into a copy as
 well

---
 llvm/lib/CodeGen/MachineSink.cpp              |   8 ++
 .../sink-and-fold-clear-kill-flags.mir        | 104 +++++++++++++++++-
 2 files changed, 111 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 6441b456cc39d1..e7e8f602683480 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -522,6 +522,14 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
       New = &*std::prev(InsertPt);
       if (!New->getDebugLoc())
         New->setDebugLoc(SinkDst->getDebugLoc());
+
+      // The operand registers of the "sunk" instruction have their live range
+      // extended and their kill flags may no longer be correct. Conservatively
+      // clear the kill flags.
+      if (UsedRegA)
+        MRI->clearKillFlags(UsedRegA);
+      if (UsedRegB)
+        MRI->clearKillFlags(UsedRegB);
     } else {
       // Fold instruction into the addressing mode of a memory instruction.
       New = TII->emitLdStWithAddr(*SinkDst, MaybeAM);
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
index 732af0890d7184..1303776230f1a3 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
@@ -1,7 +1,9 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
 # RUN: llc --run-pass=machine-sink %s -o - | FileCheck %s
 
-# Test that the "killed" flags is not present in the ORRWrs instruction below
+# Test that the "killed" flags are cleared in the ORRWrs and SUBSWrr  instructions
+# in 'f and @g, respectively
+
 --- |
   source_filename = "crash.ll"
   target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -24,6 +26,24 @@
     br label %A
   }
 
+  define i32 @g(i32 %i, i32 %j) {
+  entry:
+    %add = add i32 %i, %j
+    %neg = sub i32 0, %i
+    br label %A
+
+  A:                                                ; preds = %B, %A, %entry
+    %0 = call i8 @h(i32 %add)
+    %c = icmp eq i8 %0, 0
+    br i1 %c, label %B, label %A
+
+  B:                                                ; preds = %A
+    %1 = call i8 @h(i32 %neg)
+    br label %A
+  }
+
+  declare i8 @h(i32)
+
 ...
 ---
 name:            f
@@ -87,5 +107,87 @@ body:             |
     %6:gpr32 = COPY $wzr
     STRWui %6, %1, 0 :: (store (s32) into %ir.image, align 1)
     B %bb.1
+...
+---
+name:            g
+alignment:       4
+registers:
+  - { id: 0, class: gpr32all, preferred-register: '' }
+  - { id: 1, class: gpr32all, preferred-register: '' }
+  - { id: 2, class: gpr32, preferred-register: '' }
+  - { id: 3, class: gpr32, preferred-register: '' }
+  - { id: 4, class: gpr32, preferred-register: '' }
+  - { id: 5, class: gpr32, preferred-register: '' }
+  - { id: 6, class: gpr32, preferred-register: '' }
+  - { id: 7, class: gpr32, preferred-register: '' }
+  - { id: 8, class: gpr32common, preferred-register: '' }
+  - { id: 9, class: gpr32all, preferred-register: '' }
+liveins:
+  - { reg: '$w0', virtual-reg: '%2' }
+  - { reg: '$w1', virtual-reg: '%3' }
+body:             |
+  ; CHECK-LABEL: name: g
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $w0, $w1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr32 = COPY $w1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
+  ; CHECK-NEXT:   [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY2]], [[COPY1]], implicit-def dead $nzcv
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gpr32all = COPY [[SUBSWrr]]
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1.A:
+  ; CHECK-NEXT:   successors: %bb.2(0x30000000), %bb.1(0x50000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   $w0 = ADDWrr [[COPY1]], [[COPY]]
+  ; CHECK-NEXT:   BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gpr32 = COPY $w0
+  ; CHECK-NEXT:   $wzr = ANDSWri [[COPY4]], 7, implicit-def $nzcv
+  ; CHECK-NEXT:   Bcc 1, %bb.1, implicit $nzcv
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2.B:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   $w0 = COPY [[COPY3]]
+  ; CHECK-NEXT:   BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-NEXT:   B %bb.1
+  bb.0.entry:
+    successors: %bb.1(0x80000000)
+    liveins: $w0, $w1
+
+    %3:gpr32 = COPY $w1
+    %2:gpr32 = COPY $w0
+    %4:gpr32 = ADDWrr %2, killed %3
+    %0:gpr32all = COPY %4
+    %5:gpr32 = COPY $wzr
+    %6:gpr32 = SUBSWrr %5, killed %2, implicit-def dead $nzcv
+    %1:gpr32all = COPY %6
+
+  bb.1.A:
+    successors: %bb.2(0x30000000), %bb.1(0x50000000)
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    $w0 = COPY %0
+    BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    %7:gpr32 = COPY $w0
+    $wzr = ANDSWri %7, 7, implicit-def $nzcv
+    Bcc 1, %bb.1, implicit $nzcv
+    B %bb.2
+
+  bb.2.B:
+    successors: %bb.1(0x80000000)
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
+    $w0 = COPY %1
+    BL @h, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $w0, implicit-def $sp, implicit-def $w0
+    ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+    B %bb.1
 
 ...



More information about the llvm-commits mailing list