[llvm] 401a45c - Fix late rematerialization operands check

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 23 12:24:06 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-08-23T12:23:58-07:00
New Revision: 401a45c61bac76378735ef94fd507e5fbf763edc

URL: https://github.com/llvm/llvm-project/commit/401a45c61bac76378735ef94fd507e5fbf763edc
DIFF: https://github.com/llvm/llvm-project/commit/401a45c61bac76378735ef94fd507e5fbf763edc.diff

LOG: Fix late rematerialization operands check

D106408 enables rematerialization of instructions with virtual
register uses. That has uncovered the bug in the allUsesAvailableAt
implementation: https://bugs.llvm.org/show_bug.cgi?id=51516.

In the majority of cases canRematerializeAt() called to check if
an instruction can be rematerialized before the given UseIdx.
However, SplitEditor::enterIntvAtEnd() calls it to rematerialize
an instruction at the end of a block passing LIS.getMBBEndIdx()
into the check. In the testcase from the bug it has attempted to
rematerialize ADDXri after STRXui in bb.17. The use operand %55
of the ADD is killed by the STRX but that is undetected by the check
because it adjusts passed UseIdx to the reg slot, before the kill.
The value is dead at the index passed to the check however.

This change uses a later of passed UseIdx and its reg slot. This
shall be correct because if are checking an availability of operands
before an instruction that instruction cannot be the one defining
these operands. If we are checking for late rematerialization we
are really interested if operands live past the instruction.

The bug is not exploitable without D106408 but needed to reland
reverted D106408.

Differential Revision: https://reviews.llvm.org/D108475

Added: 
    llvm/test/CodeGen/AArch64/pr51516.mir
    llvm/test/CodeGen/AMDGPU/pr51516.mir

Modified: 
    llvm/lib/CodeGen/LiveRangeEdit.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 64a2dd2756430..87f6633b73ecb 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -107,7 +107,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
                                        SlotIndex OrigIdx,
                                        SlotIndex UseIdx) const {
   OrigIdx = OrigIdx.getRegSlot(true);
-  UseIdx = UseIdx.getRegSlot(true);
+  UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));
   for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = OrigMI->getOperand(i);
     if (!MO.isReg() || !MO.getReg() || !MO.readsReg())

diff  --git a/llvm/test/CodeGen/AArch64/pr51516.mir b/llvm/test/CodeGen/AArch64/pr51516.mir
new file mode 100644
index 0000000000000..910bfb858b50f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr51516.mir
@@ -0,0 +1,208 @@
+# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s
+
+# Check that we spill %31 and do not rematerialize it since the use operand
+# of ADDXri is killed by the STRXui in this block.
+
+# CHECK-LABEL: name: test
+# CHECK: bb.17:
+# CHECK:   STRXui
+# CHECK:   LDRXui
+# CHECK: bb.18:
+
+---
+name:            test
+tracksRegLiveness: true
+stack:
+  - { id: 0, type: variable-sized, offset: 0, alignment: 32,
+      stack-id: default }
+body:             |
+  bb.0.entry:
+    successors: %bb.2(0x40000000), %bb.1(0x40000000)
+    liveins: $x0
+
+    %22:gpr64common = COPY $x0
+    CBZW $wzr, %bb.2
+
+  bb.1:
+    successors: %bb.3(0x80000000)
+
+    %1:gpr64 = COPY $xzr
+    B %bb.3
+
+  bb.2:
+    successors: %bb.3(0x80000000)
+
+    $x0 = IMPLICIT_DEF
+    %1:gpr64 = COPY $x0
+
+  bb.3:
+    successors: %bb.4(0x30000000), %bb.5(0x50000000)
+
+    %2:gpr64common = COPY %1
+    CBNZX %1, %bb.5
+    B %bb.4
+
+  bb.4:
+    successors: %bb.5(0x80000000)
+
+    %31:gpr64common = SUBXri $sp, 288, 0
+    $sp = ANDXri %31, 7930
+    %2:gpr64common = COPY $xzr
+
+  bb.5:
+    successors: %bb.6(0x80000000)
+
+    %9:gpr64common = COPY $xzr
+    %35:gpr64sp = ADDXri %2, 32, 0
+    %4:gpr64common = UBFMXri %2, 3, 63
+    %37:gpr64 = MOVi64imm -506381209866536712
+    %38:gpr32 = MOVi32imm -202116109
+    %39:gpr64common = nuw ADDXri %22, 836, 0
+    %41:gpr64common = nuw ADDXri %22, 648, 0
+    STRXui %37, %4, 1
+    STRWui %38, %4, 8
+    %8:gpr64common = UBFMXri %39, 3, 63
+    %7:gpr64common = UBFMXri %41, 3, 63
+    %47:gpr64 = MOVi64imm 0
+
+  bb.6:
+    successors: %bb.8(0x30000000), %bb.7(0x50000000)
+
+    %44:gpr64common = ADDXrr %22, %9
+    %10:gpr64common = ADDXri %44, 648, 0
+    %11:gpr64common = ANDSXri %10, 4098, implicit-def $nzcv
+    Bcc 0, %bb.8, implicit killed $nzcv
+    B %bb.7
+
+  bb.7:
+    successors: %bb.8(0x80000000)
+
+    BL 0, csr_aarch64_aapcs, implicit-def dead $lr
+
+  bb.8:
+    successors: %bb.9(0x04000000), %bb.24(0x7c000000)
+
+    CBNZW $wzr, %bb.24
+    B %bb.9
+
+  bb.9:
+    successors: %bb.10(0x7ffff800), %bb.11(0x00000800)
+
+    %55:gpr64common = ADDXrr %22, %9
+    %56:gpr64sp = ADDXri %55, 648, 0
+    CBZX %11, %bb.10
+    B %bb.11
+
+  bb.10:
+    successors: %bb.11(0x80000000)
+
+    $x0 = ADDXri %55, 648, 0
+    $x2 = IMPLICIT_DEF
+    $w1 = COPY $wzr
+    $x1 = nuw ADDXri %35, 32, 0
+    BL 0, csr_aarch64_aapcs, implicit-def dead $lr
+    %66:gpr64sp = nuw ADDXri %35, 48, 0
+    $x0 = ADDXri %55, 696, 0
+    $x2 = IMPLICIT_DEF
+    $x1 = COPY %66
+
+  bb.11:
+    successors: %bb.15(0x7ffff800), %bb.12(0x00000800)
+
+    CBZX %11, %bb.15
+    B %bb.12
+
+  bb.12:
+    successors: %bb.13(0x00000000), %bb.14(0x80000000)
+
+    CBNZW $wzr, %bb.14
+    B %bb.13
+
+  bb.13:
+    successors:
+
+  bb.14:
+    successors: %bb.18(0x80000000)
+
+    $x1 = COPY %56
+    B %bb.18
+
+  bb.15:
+    successors: %bb.16(0x00000000), %bb.17(0x80000000)
+
+    %76:gpr32 = LDRBBui %7, 0
+    CBZW %76, %bb.17
+    B %bb.16
+
+  bb.16:
+    successors:
+
+    %74:gpr64common = ADDXrr %22, %9
+    %15:gpr64sp = ADDXri %74, 648, 0
+    $x0 = COPY %15
+
+  bb.17:
+    successors: %bb.18(0x80000000)
+
+    STRXui %22, %55, 81
+
+  bb.18:
+    successors: %bb.19(0x80000000), %bb.20(0x00000000)
+
+    %79:gpr32 = LDRBBui %8, 0
+    CBNZW %79, %bb.20
+    B %bb.19
+
+  bb.19:
+    successors: %bb.21(0x80000000), %bb.20(0x00000000)
+
+    %80:gpr32 = MOVi32imm 1
+    CBNZW %80, %bb.21
+    B %bb.20
+
+  bb.20:
+    successors:
+
+    %16:gpr64sp = ADDXri %22, 836, 0
+    $x0 = COPY %16
+
+  bb.21:
+    successors: %bb.24(0x00000000), %bb.22(0x80000000)
+
+    CBZW $wzr, %bb.24
+
+  bb.22:
+    successors: %bb.26(0x80000000)
+
+    B %bb.26
+
+  bb.24:
+    successors: %bb.25(0x04000000), %bb.6(0x7c000000)
+
+    %8:gpr64common = ADDXri %8, 24, 0
+    %7:gpr64common = ADDXri %7, 24, 0
+    CBNZW $wzr, %bb.6
+
+  bb.25:
+    successors: %bb.26(0x80000000)
+
+    %56:gpr64sp = COPY $xzr
+
+  bb.26:
+    successors: %bb.28(0x50000000), %bb.27(0x30000000)
+
+    undef %83.sub_32:gpr64 = MOVi32imm 1172321806
+    STRXui %83, %2, 0
+    CBNZX %1, %bb.28
+    B %bb.27
+
+  bb.27:
+    successors: %bb.28(0x80000000)
+
+    STRXui $xzr, %4, 0
+
+  bb.28:
+    $x0 = COPY %56
+    RET_ReallyLR implicit $x0
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/pr51516.mir b/llvm/test/CodeGen/AMDGPU/pr51516.mir
new file mode 100644
index 0000000000000..980cc27591670
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/pr51516.mir
@@ -0,0 +1,114 @@
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -start-before=machine-scheduler -stop-after=virtregrewriter,1 -o - %s | FileCheck -check-prefix=GCN %s
+
+# Check that %3 was not rematerialized before the last store since its operand %1
+# is killed by that store.
+
+# GCN-LABEL: name: global_sextload_v32i32_to_v32i64
+# GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+# GCN: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr27, killed renamable $vgpr19_vgpr20_vgpr21_vgpr22, killed renamable $sgpr0_sgpr1, 16, 0, implicit $exec, implicit killed renamable $vgpr0
+
+---
+name:            global_sextload_v32i32_to_v32i64
+tracksRegLiveness: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  bb.0:
+    liveins: $sgpr4_sgpr5
+
+    %0:sgpr_64(p4) = COPY $sgpr4_sgpr5
+    %1:vgpr_32 = COPY $m0
+    %2:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 0, 0
+    %3:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec
+    %4:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 112, 0, implicit $exec :: (load (s128))
+    %5:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 96, 0, implicit $exec, implicit %1 :: (load (s128))
+    %6:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 80, 0, implicit $exec :: (load (s128))
+    %7:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 64, 0, implicit $exec :: (load (s128))
+    %8:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 48, 0, implicit $exec :: (load (s128))
+    %9:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 32, 0, implicit $exec :: (load (s128))
+    %10:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 16, 0, implicit $exec :: (load (s128))
+    %11:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 0, 0, implicit $exec :: (load (s128))
+    undef %12.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub3, implicit $exec
+    %12.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub2, implicit $exec
+    undef %13.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub1, implicit $exec
+    %13.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub0, implicit $exec
+    undef %14.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub3, implicit $exec
+    %14.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub2, implicit $exec
+    undef %15.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub1, implicit $exec
+    %15.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub0, implicit $exec
+    undef %16.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub3, implicit $exec
+    %16.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub2, implicit $exec
+    undef %17.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub1, implicit $exec
+    %17.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub0, implicit $exec
+    undef %18.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub3, implicit $exec
+    %18.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub2, implicit $exec
+    undef %19.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub1, implicit $exec
+    %19.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub0, implicit $exec
+    undef %20.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub3, implicit $exec
+    %20.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub2, implicit $exec
+    undef %21.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub1, implicit $exec
+    %21.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub0, implicit $exec
+    undef %22.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub3, implicit $exec
+    %22.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub2, implicit $exec
+    undef %23.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub1, implicit $exec
+    %23.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub0, implicit $exec
+    undef %24.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub3, implicit $exec
+    %24.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub2, implicit $exec
+    undef %25.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub1, implicit $exec
+    %25.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub0, implicit $exec
+    undef %26.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub3, implicit $exec
+    %26.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub2, implicit $exec
+    undef %27.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub1, implicit $exec
+    %27.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub0, implicit $exec
+    %27.sub0:vreg_128 = COPY %4.sub0
+    %27.sub2:vreg_128 = COPY %4.sub1
+    GLOBAL_STORE_DWORDX4_SADDR %3, %27, %2.sub0_sub1, 224, 0, implicit $exec
+    %26.sub0:vreg_128 = COPY %4.sub2
+    %26.sub2:vreg_128 = COPY %4.sub3
+    GLOBAL_STORE_DWORDX4_SADDR %3, %26, %2.sub0_sub1, 240, 0, implicit $exec
+    %25.sub0:vreg_128 = COPY %5.sub0
+    %25.sub2:vreg_128 = COPY %5.sub1
+    GLOBAL_STORE_DWORDX4_SADDR %3, %25, %2.sub0_sub1, 192, 0, implicit $exec
+    %24.sub0:vreg_128 = COPY %5.sub2
+    %24.sub2:vreg_128 = COPY %5.sub3
+    GLOBAL_STORE_DWORDX4_SADDR %3, %24, %2.sub0_sub1, 208, 0, implicit $exec
+    %23.sub0:vreg_128 = COPY %6.sub0
+    %23.sub2:vreg_128 = COPY %6.sub1
+    GLOBAL_STORE_DWORDX4_SADDR %3, %23, %2.sub0_sub1, 160, 0, implicit $exec
+    %22.sub0:vreg_128 = COPY %6.sub2
+    %22.sub2:vreg_128 = COPY %6.sub3
+    GLOBAL_STORE_DWORDX4_SADDR %3, %22, %2.sub0_sub1, 176, 0, implicit $exec
+    %21.sub0:vreg_128 = COPY %7.sub0
+    %21.sub2:vreg_128 = COPY %7.sub1
+    GLOBAL_STORE_DWORDX4_SADDR %3, %21, %2.sub0_sub1, 128, 0, implicit $exec
+    %20.sub0:vreg_128 = COPY %7.sub2
+    %20.sub2:vreg_128 = COPY %7.sub3
+    GLOBAL_STORE_DWORDX4_SADDR %3, %20, %2.sub0_sub1, 144, 0, implicit $exec
+    %19.sub0:vreg_128 = COPY %8.sub0
+    %19.sub2:vreg_128 = COPY %8.sub1
+    GLOBAL_STORE_DWORDX4_SADDR %3, %19, %2.sub0_sub1, 96, 0, implicit $exec
+    %18.sub0:vreg_128 = COPY %8.sub2
+    %18.sub2:vreg_128 = COPY %8.sub3
+    GLOBAL_STORE_DWORDX4_SADDR %3, %18, %2.sub0_sub1, 112, 0, implicit $exec
+    %17.sub0:vreg_128 = COPY %9.sub0
+    %17.sub2:vreg_128 = COPY %9.sub1
+    GLOBAL_STORE_DWORDX4_SADDR %3, %17, %2.sub0_sub1, 64, 0, implicit $exec
+    %16.sub0:vreg_128 = COPY %9.sub2
+    %16.sub2:vreg_128 = COPY %9.sub3
+    GLOBAL_STORE_DWORDX4_SADDR %3, %16, %2.sub0_sub1, 80, 0, implicit $exec
+    %15.sub0:vreg_128 = COPY %10.sub0
+    %15.sub2:vreg_128 = COPY %10.sub1
+    GLOBAL_STORE_DWORDX4_SADDR %3, %15, %2.sub0_sub1, 32, 0, implicit $exec
+    %14.sub0:vreg_128 = COPY %10.sub2
+    %14.sub2:vreg_128 = COPY %10.sub3
+    GLOBAL_STORE_DWORDX4_SADDR %3, %14, %2.sub0_sub1, 48, 0, implicit $exec
+    %13.sub0:vreg_128 = COPY %11.sub0
+    %13.sub2:vreg_128 = COPY %11.sub1
+    GLOBAL_STORE_DWORDX4_SADDR %3, %13, %2.sub0_sub1, 0, 0, implicit $exec
+    %12.sub0:vreg_128 = COPY %11.sub2
+    %12.sub2:vreg_128 = COPY %11.sub3
+    GLOBAL_STORE_DWORDX4_SADDR %3, %12, %2.sub0_sub1, 16, 0, implicit $exec, implicit killed %1
+    S_ENDPGM 0
+
+...


        


More information about the llvm-commits mailing list