[llvm] [MachineSink] Clear kill flags of sunk addressing mode registers (PR #75072)
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 11 14:21:11 PST 2023
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75072
>From e9fcb408f9fb6106502ca3ff3c8863b825367994 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Mon, 11 Dec 2023 16:31:35 +0000
Subject: [PATCH 1/2] [MachineSink] Clear kill flags of sunk addressing mode
registers
When doing sink-and-fold, the MachineSink clears the "killed" flags
of the operands of the sunk (and deleted) instruction. However, this is
not always sufficient. In some cases we can create the new load/store
instruction with operands other than the ones present in the deleted
instruction. One such example is folding a zero word extend into
a memory load on AArch64. The zero-extend is represented by a pair
of instructions - `MOV` (i.e. `ORRwrs`) followed by a `SUBREG_TO_REG`.
The `SUBREG_TO_REG` is deleted (it is the sunk instruction), but the new
load instruction mentions operands "killed" in the `MOV`, which is no
longer correct.
To fix this, clear the "killed" flags of the registers paritipating in
the adressing mode.
---
llvm/lib/CodeGen/MachineSink.cpp | 13 +-
.../sink-and-fold-clear-kill-flags.mir | 134 ++++++++++++++++++
2 files changed, 142 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 83d775055dfd7..6441b456cc39d 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -500,11 +500,6 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
return false;
// Now we know we can fold the instruction in all its users.
- if (UsedRegA)
- MRI->clearKillFlags(UsedRegA);
- if (UsedRegB)
- MRI->clearKillFlags(UsedRegB);
-
for (auto &[SinkDst, MaybeAM] : SinkInto) {
MachineInstr *New = nullptr;
LLVM_DEBUG(dbgs() << "Sinking copy of"; MI.dump(); dbgs() << "into";
@@ -530,6 +525,14 @@ bool MachineSinking::PerformSinkAndFold(MachineInstr &MI,
} else {
// Fold instruction into the addressing mode of a memory instruction.
New = TII->emitLdStWithAddr(*SinkDst, MaybeAM);
+
+ // The registers of the addressing mode may have their live range extended
+ // and their kill flags may no longer be correct. Conservatively clear the
+ // kill flags.
+ if (Register R = MaybeAM.BaseReg; R.isValid() && R.isVirtual())
+ MRI->clearKillFlags(R);
+ if (Register R = MaybeAM.ScaledReg; R.isValid() && R.isVirtual())
+ MRI->clearKillFlags(R);
}
LLVM_DEBUG(dbgs() << "yielding"; New->dump());
// Clear the StoreInstrCache, since we may invalidate it by erasing.
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
new file mode 100644
index 0000000000000..effc346848b75
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
@@ -0,0 +1,134 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc --run-pass=machine-sink %s -o - | FileCheck %s
+
+# Test that the "killed" flags is not present in the ORRWrs instruction below
+--- |
+ source_filename = "crash.ll"
+ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+ target triple = "aarch64-linux"
+
+ define i32 @f(ptr %image, i32 %i) {
+ entry:
+ %add = add i32 %i, 1
+ %idx = zext i32 %add to i64
+ br label %A
+
+ A: ; preds = %B, %A, %entry
+ %sunkaddr = getelementptr i8, ptr %image, i64 %idx
+ %0 = load i8, ptr %sunkaddr, align 1
+ %cmp153 = icmp eq i8 %0, 0
+ br i1 %cmp153, label %B, label %A
+
+ B: ; preds = %A
+ store i32 0, ptr %image, align 1
+ br label %A
+ }
+
+...
+---
+name: f
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHCatchret: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: gpr64, preferred-register: '' }
+ - { id: 1, class: gpr64common, preferred-register: '' }
+ - { id: 2, class: gpr32common, preferred-register: '' }
+ - { id: 3, class: gpr32common, preferred-register: '' }
+ - { id: 4, class: gpr32, preferred-register: '' }
+ - { id: 5, class: gpr32, preferred-register: '' }
+ - { id: 6, class: gpr32, preferred-register: '' }
+liveins:
+ - { reg: '$x0', virtual-reg: '%1' }
+ - { reg: '$w1', virtual-reg: '%2' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 0
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: f
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $x0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY $w1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32common = ADDWri [[COPY]], 1, 0
+ ; CHECK-NEXT: [[ORRWrs:%[0-9]+]]:gpr32 = ORRWrs $wzr, [[ADDWri]], 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.A:
+ ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.1(0x50000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[LDRBBroW:%[0-9]+]]:gpr32 = LDRBBroW [[COPY1]], [[ADDWri]], 0, 0 :: (load (s8) from %ir.sunkaddr)
+ ; CHECK-NEXT: CBNZW killed [[LDRBBroW]], %bb.1
+ ; CHECK-NEXT: B %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.B:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr
+ ; CHECK-NEXT: STRWui [[COPY2]], [[COPY1]], 0 :: (store (s32) into %ir.image, align 1)
+ ; CHECK-NEXT: B %bb.1
+ bb.0.entry:
+ successors: %bb.1(0x80000000)
+ liveins: $x0, $w1
+
+ %2:gpr32common = COPY $w1
+ %1:gpr64common = COPY $x0
+ %3:gpr32common = ADDWri %2, 1, 0
+ %4:gpr32 = ORRWrs $wzr, killed %3, 0
+ %0:gpr64 = SUBREG_TO_REG 0, killed %4, %subreg.sub_32
+
+ bb.1.A:
+ successors: %bb.2(0x30000000), %bb.1(0x50000000)
+
+ %5:gpr32 = LDRBBroX %1, %0, 0, 0 :: (load (s8) from %ir.sunkaddr)
+ CBNZW killed %5, %bb.1
+ B %bb.2
+
+ bb.2.B:
+ successors: %bb.1(0x80000000)
+
+ %6:gpr32 = COPY $wzr
+ STRWui %6, %1, 0 :: (store (s32) into %ir.image, align 1)
+ B %bb.1
+
+...
>From 3e487cc634ff4fe7744e10678cf1b4f8731b5115 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Mon, 11 Dec 2023 22:15:40 +0000
Subject: [PATCH 2/2] [fixup] Remove some cruft from a test
---
.../sink-and-fold-clear-kill-flags.mir | 43 -------------------
1 file changed, 43 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
index effc346848b75..732af0890d718 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold-clear-kill-flags.mir
@@ -28,22 +28,7 @@
---
name: f
alignment: 4
-exposesReturnsTwice: false
-legalized: false
-regBankSelected: false
-selected: false
-failedISel: false
tracksRegLiveness: true
-hasWinCFI: false
-callsEHReturn: false
-callsUnwindInit: false
-hasEHCatchret: false
-hasEHScopes: false
-hasEHFunclets: false
-isOutlined: false
-debugInstrRef: false
-failsVerification: false
-tracksDebugUserValues: false
registers:
- { id: 0, class: gpr64, preferred-register: '' }
- { id: 1, class: gpr64common, preferred-register: '' }
@@ -55,34 +40,6 @@ registers:
liveins:
- { reg: '$x0', virtual-reg: '%1' }
- { reg: '$w1', virtual-reg: '%2' }
-frameInfo:
- isFrameAddressTaken: false
- isReturnAddressTaken: false
- hasStackMap: false
- hasPatchPoint: false
- stackSize: 0
- offsetAdjustment: 0
- maxAlignment: 1
- adjustsStack: false
- hasCalls: false
- stackProtector: ''
- functionContext: ''
- maxCallFrameSize: 0
- cvBytesOfCalleeSavedRegisters: 0
- hasOpaqueSPAdjustment: false
- hasVAStart: false
- hasMustTailInVarArgFunc: false
- hasTailCall: false
- localFrameSize: 0
- savePoint: ''
- restorePoint: ''
-fixedStack: []
-stack: []
-entry_values: []
-callSites: []
-debugValueSubstitutions: []
-constants: []
-machineFunctionInfo: {}
body: |
; CHECK-LABEL: name: f
; CHECK: bb.0.entry:
More information about the llvm-commits
mailing list