[llvm] LiveRangeEdit: Clear all dead flags when rematerializing (PR #73933)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 05:14:30 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-regalloc
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
It's allowed to rematerialize instructions with implicit-defs of the same register as the single explicit def. If this happened, it was only clearing the dead flags on the one main result.
---
Full diff: https://github.com/llvm/llvm-project/pull/73933.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/LiveRangeEdit.cpp (+1-1)
- (added) llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll (+125)
- (added) llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.mir (+119)
``````````diff
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index ff49e080090c2bd..0203034b5a01474 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -190,7 +190,7 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
// DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg
// to false anyway in case the isDead flag of RM.OrigMI's dest register
// is true.
- (*--MI).getOperand(0).setIsDead(false);
+ (*--MI).clearRegisterDeads(DestReg);
Rematted.insert(RM.ParentVNI);
++NumReMaterialization;
diff --git a/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll b/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll
new file mode 100644
index 000000000000000..bc26eca6f27ef8e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=arm64-apple-macosx10.13.0 -mcpu=apple-m1 < %s | FileCheck %s
+
+ at .str = external constant [9 x i8]
+
+define void @_ZN38SanitizerCommonInterceptors_Scanf_Test8TestBodyEv(ptr %.str.40, ptr %.str.41, ptr %.str.42, ptr %.str.43, ptr %.str.44, ptr %.str.45, ptr nocapture writeonly %.str.47) nounwind {
+; CHECK-LABEL: _ZN38SanitizerCommonInterceptors_Scanf_Test8TestBodyEv:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: sub sp, sp, #128
+; CHECK-NEXT: stp x28, x27, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #96] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #112] ; 16-byte Folded Spill
+; CHECK-NEXT: mov x24, x6
+; CHECK-NEXT: mov x19, x5
+; CHECK-NEXT: mov x20, x4
+; CHECK-NEXT: mov x21, x3
+; CHECK-NEXT: mov x22, x2
+; CHECK-NEXT: mov x23, x1
+; CHECK-NEXT: mov x25, x0
+; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: mov x0, #0 ; =0x0
+; CHECK-NEXT: mov w1, #1 ; =0x1
+; CHECK-NEXT: bl __ZL9testScanfPKcjz
+; CHECK-NEXT: mov w28, #4 ; =0x4
+; CHECK-NEXT: stp x28, x28, [sp, #8]
+; CHECK-NEXT: str x28, [sp]
+; CHECK-NEXT: mov x0, #0 ; =0x0
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL9testScanfPKcjz
+; CHECK-NEXT: stp x28, xzr, [sp]
+; CHECK-NEXT: mov x0, #0 ; =0x0
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL9testScanfPKcjz
+; CHECK-NEXT: mov w27, #8 ; =0x8
+; CHECK-NEXT: str x27, [sp]
+; CHECK-NEXT: mov x0, #0 ; =0x0
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL9testScanfPKcjz
+; CHECK-NEXT: mov w26, #1 ; =0x1
+; CHECK-NEXT: stp xzr, x26, [sp]
+; CHECK-NEXT: mov x0, #0 ; =0x0
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL9testScanfPKcjz
+; CHECK-NEXT: str x26, [sp]
+; CHECK-NEXT: mov x0, #0 ; =0x0
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL9testScanfPKcjz
+; CHECK-NEXT: str x28, [sp]
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x26, _.str at GOTPAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: ldr x26, [x26, _.str at GOTPAGEOFF]
+; CHECK-NEXT: mov x0, x26
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL9testScanfPKcjz
+; CHECK-NEXT: str wzr, [x24]
+; CHECK-NEXT: str x27, [sp]
+; CHECK-NEXT: mov x0, x25
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
+; CHECK-NEXT: str x28, [sp]
+; CHECK-NEXT: mov x0, x23
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
+; CHECK-NEXT: str x28, [sp]
+; CHECK-NEXT: mov x0, x22
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
+; CHECK-NEXT: str x28, [sp]
+; CHECK-NEXT: mov x0, x21
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
+; CHECK-NEXT: str x28, [sp]
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
+; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
+; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: mov x0, x26
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
+; CHECK-NEXT: mov w8, #1 ; =0x1
+; CHECK-NEXT: stp x8, xzr, [sp, #8]
+; CHECK-NEXT: str xzr, [sp]
+; CHECK-NEXT: mov x0, #0 ; =0x0
+; CHECK-NEXT: mov w1, #0 ; =0x0
+; CHECK-NEXT: bl __ZL20testScanfNoGnuMallocPKcjz
+; CHECK-NEXT: ldp x29, x30, [sp, #112] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #96] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #128
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1
+entry:
+ tail call void (ptr, i32, ...) @_ZL9testScanfPKcjz(ptr null, i32 1, i32 0)
+ tail call void (ptr, i32, ...) @_ZL9testScanfPKcjz(ptr null, i32 0, i32 4, i32 4, i32 4)
+ tail call void (ptr, i32, ...) @_ZL9testScanfPKcjz(ptr null, i32 0, i32 4, i32 0)
+ tail call void (ptr, i32, ...) @_ZL9testScanfPKcjz(ptr null, i32 0, i32 8)
+ tail call void (ptr, i32, ...) @_ZL9testScanfPKcjz(ptr null, i32 0, i32 0, i32 1)
+ tail call void (ptr, i32, ...) @_ZL9testScanfPKcjz(ptr null, i32 0, i32 1)
+ tail call void (ptr, i32, ...) @_ZL9testScanfPKcjz(ptr nonnull @.str, i32 0, i32 4)
+ store i32 0, ptr %.str.47, align 4
+ tail call void (ptr, i32, ...) @_ZL20testScanfNoGnuMallocPKcjz(ptr %.str.40, i32 0, i32 8)
+ tail call void (ptr, i32, ...) @_ZL20testScanfNoGnuMallocPKcjz(ptr %.str.41, i32 0, i32 4)
+ tail call void (ptr, i32, ...) @_ZL20testScanfNoGnuMallocPKcjz(ptr %.str.42, i32 0, i32 4)
+ tail call void (ptr, i32, ...) @_ZL20testScanfNoGnuMallocPKcjz(ptr %.str.43, i32 0, i32 4)
+ tail call void (ptr, i32, ...) @_ZL20testScanfNoGnuMallocPKcjz(ptr %.str.44, i32 0, i32 4)
+ tail call void (ptr, i32, ...) @_ZL20testScanfNoGnuMallocPKcjz(ptr %.str.45, i32 0, i32 0)
+ tail call void (ptr, i32, ...) @_ZL20testScanfNoGnuMallocPKcjz(ptr nonnull @.str, i32 0, i32 0)
+ tail call void (ptr, i32, ...) @_ZL20testScanfNoGnuMallocPKcjz(ptr null, i32 0, i32 0, i32 1, i32 0)
+ ret void
+}
+
+declare void @_ZL9testScanfPKcjz(ptr, i32, ...) local_unnamed_addr
+
+declare void @_ZL20testScanfNoGnuMallocPKcjz(ptr, i32, ...) local_unnamed_addr
diff --git a/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.mir b/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.mir
new file mode 100644
index 000000000000000..9040937d027df48
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/clear-dead-implicit-def-impdef.mir
@@ -0,0 +1,119 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+---
+name: func
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6
+
+ ; CHECK-LABEL: name: func
+ ; CHECK: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64sp = IMPLICIT_DEF
+ ; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr64 = COPY $x6
+ ; CHECK-NEXT: undef [[MOVi32imm:%[0-9]+]].sub_32:gpr64 = MOVi32imm 4, implicit-def [[MOVi32imm]]
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: BL 0, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKUP 16, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:gpr64 = IMPLICIT_DEF
+ ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 16, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: undef [[DEF3:%[0-9]+]].sub_32:gpr64 = IMPLICIT_DEF implicit-def [[DEF3]]
+ ; CHECK-NEXT: STRXui [[DEF3]], [[DEF]], 1 :: (store (s64) into stack + 8)
+ ; CHECK-NEXT: BL 0, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ; CHECK-NEXT: ADJCALLSTACKUP 16, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: STRXui [[MOVi32imm]], [[DEF]], 0 :: (store (s64) into stack)
+ ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: STRWui undef [[MOVi32imm]].sub_32, [[COPY]], 0 :: (store (s32))
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: STRXui [[DEF2]], undef [[DEF]], 0 :: (store (s64) into stack)
+ ; CHECK-NEXT: $x0 = COPY [[COPY6]]
+ ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: $x0 = COPY [[COPY5]]
+ ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: $x0 = COPY [[COPY4]]
+ ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: $x0 = COPY [[COPY3]]
+ ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: $x0 = COPY [[COPY2]]
+ ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: $x0 = COPY [[COPY1]]
+ ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: undef [[DEF4:%[0-9]+]].sub_32:gpr64 = IMPLICIT_DEF implicit-def [[DEF4]]
+ ; CHECK-NEXT: STRXui [[DEF4]], undef [[DEF]], 1 :: (store (s64) into stack + 8)
+ ; CHECK-NEXT: ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:gpr64sp = IMPLICIT_DEF
+ undef %13.sub_32:gpr64 = IMPLICIT_DEF implicit-def %13
+ dead %2:gpr32 = IMPLICIT_DEF
+ %3:gpr64common = COPY $x0
+ %4:gpr64 = COPY $x1
+ %5:gpr64 = COPY $x2
+ %6:gpr64 = COPY $x3
+ %7:gpr64 = COPY $x4
+ %8:gpr64 = COPY $x5
+ %9:gpr64 = COPY $x6
+ undef %11.sub_32:gpr64 = MOVi32imm 4, implicit-def %11
+ ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ BL 0, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 16, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKUP 16, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ %12:gpr64 = IMPLICIT_DEF
+ ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 16, 0, implicit-def dead $sp, implicit $sp
+ STRXui %13, %0, 1 :: (store (s64) into stack + 8)
+ BL 0, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+ ADJCALLSTACKUP 16, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ STRXui %11, %0, 0 :: (store (s64) into stack)
+ ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ STRWui undef %11.sub_32, %3, 0 :: (store (s32))
+ ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ STRXui %12, undef %0, 0 :: (store (s64) into stack)
+ $x0 = COPY %9
+ ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ $x0 = COPY %8
+ ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ $x0 = COPY %7
+ ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ $x0 = COPY %6
+ ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ $x0 = COPY %5
+ ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
+ $x0 = COPY %4
+ ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
+ ADJCALLSTACKDOWN 24, 0, implicit-def dead $sp, implicit $sp
+ STRXui %13, undef %0, 1 :: (store (s64) into stack + 8)
+ ADJCALLSTACKUP 24, 0, implicit-def dead $sp, implicit $sp
+ RET_ReallyLR
+
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/73933
More information about the llvm-commits
mailing list