[llvm] [AMDGPU] Shrink S_MOV_B64 to S_MOV_B32 during rematerialization (PR #184333)
Romanov Vlad via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 4 11:04:30 PST 2026
https://github.com/romanovvlad updated https://github.com/llvm/llvm-project/pull/184333
>From feda0c6cb37fd1927b6023debe8148f727a7ee18 Mon Sep 17 00:00:00 2001
From: Vlad <Vladislav.Romanov at amd.com>
Date: Wed, 25 Feb 2026 08:11:46 -0600
Subject: [PATCH 1/2] [AMDGPU] Shrink S_MOV_B64 to S_MOV_B32 during
rematerialization
When rematerializing S_MOV_B64 or S_MOV_B64_IMM_PSEUDO and only a
single 32-bit lane of the result is used at the remat point, emit
S_MOV_B32 with the appropriate half of the 64-bit immediate instead.
This reduces register pressure by defining a 32-bit register instead
of a 64-bit pair when the other half is unused.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 72 +++++++++++++++++++
.../AMDGPU/regalloc-sgpr128-partial-def.mir | 65 +++++++++++++++++
2 files changed, 137 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/regalloc-sgpr128-partial-def.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 91d85990ce16c..c8003e4bae9a2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2617,6 +2617,78 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB,
// TODO: Handle more cases.
unsigned Opcode = Orig.getOpcode();
switch (Opcode) {
+ case AMDGPU::S_MOV_B64:
+ case AMDGPU::S_MOV_B64_IMM_PSEUDO: {
+ if (SubIdx != 0)
+ break;
+
+ if (I == MBB.end())
+ break;
+
+ if (I->isBundled())
+ break;
+
+ if (!Orig.getOperand(1).isImm())
+ break;
+
+ // Shrink S_MOV_B64 to S_MOV_B32 when the use at the insertion point
+ // only needs a single 32-bit subreg of the defined value.
+
+ // Scan all uses of the original register from the insertion point
+ // and verify that all uses in the same live range read the same lane.
+ // Stop at a def of RegToFind since that starts a new live
+ // range whose uses won't be rewritten to our DestReg.
+ Register RegToFind = Orig.getOperand(0).getReg();
+ unsigned UseSubReg = AMDGPU::NoSubRegister;
+
+ [&]() {
+ for (auto It = I; It != MBB.end(); ++It) {
+ for (auto &MO : It->operands()) {
+ // Skip irrelevant registers
+ if (!MO.isReg() || MO.getReg() != RegToFind)
+ continue;
+
+ // Stop at a new live range
+ if (MO.isDef())
+ return;
+
+ if (UseSubReg == AMDGPU::NoSubRegister) {
+ UseSubReg = MO.getSubReg();
+ continue;
+ }
+
+ // Bail out if subregs do not match between uses
+ if (MO.getSubReg() != UseSubReg) {
+ UseSubReg = AMDGPU::NoSubRegister;
+ return;
+ }
+ }
+ }
+ }();
+
+ if (UseSubReg == AMDGPU::NoSubRegister)
+ break;
+
+ if (RI.getSubRegIdxSize(UseSubReg) != 32)
+ break;
+
+ // Determine which half of the 64-bit immediate corresponds to the use.
+ unsigned UseOffset = RI.getSubRegIdxOffset(UseSubReg);
+ unsigned OrigSubReg = Orig.getOperand(0).getSubReg();
+ unsigned DefOffset =
+ (OrigSubReg == AMDGPU::NoSubRegister)
+ ? 0
+ : RI.getSubRegIdxOffset(Orig.getOperand(0).getSubReg());
+ int64_t Imm64 = Orig.getOperand(1).getImm();
+ int32_t Imm32 = (UseOffset == DefOffset) ? Lo_32(Imm64) : Hi_32(Imm64);
+
+ // Emit S_MOV_B32 defining just the needed 32-bit subreg of DestReg.
+ BuildMI(MBB, I, Orig.getDebugLoc(), get(AMDGPU::S_MOV_B32))
+ .addReg(DestReg, RegState::Define | RegState::Undef, UseSubReg)
+ .addImm(Imm32);
+ return;
+ }
+
case AMDGPU::S_LOAD_DWORDX16_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM: {
if (SubIdx != 0)
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-sgpr128-partial-def.mir b/llvm/test/CodeGen/AMDGPU/regalloc-sgpr128-partial-def.mir
new file mode 100644
index 0000000000000..6409b3d864c3f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-sgpr128-partial-def.mir
@@ -0,0 +1,65 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs --stress-regalloc=2 -start-before=greedy -stop-after=virtregrewriter \
+# RUN: %s -o - | FileCheck %s
+
+# Test that S_MOV_B64 rematerialization is shrunk to S_MOV_B32 when only a
+# single 32-bit subreg of the defined value is used at the remat point.
+
+# CHECK-LABEL: name: remat_shrink_s_mov_b64
+---
+name: remat_shrink_s_mov_b64
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr8_sgpr9
+ %0:sreg_64_xexec = COPY $sgpr8_sgpr9
+
+ ; 0x0000002A_00000539: Lo_32 = 1337, Hi_32 = 42
+ undef %1.sub0_sub1:sgpr_128 = S_MOV_B64 180388627769
+ ; 0x00000007_00000065: Lo_32 = 101, Hi_32 = 7
+ undef %2.sub0_sub1:sgpr_128 = S_MOV_B64 30064771173
+ %2.sub2_sub3:sgpr_128 = S_MOV_B64 2
+ undef %3.sub2_sub3:sgpr_128 = S_MOV_B64 30064771173
+ undef %4.sub2_sub3:sgpr_128 = S_MOV_B64 180388627769
+ undef %5.sub0_sub1:sgpr_128 = S_MOV_B64 180388627769
+ %6:sreg_64 = S_MOV_B64_IMM_PSEUDO 65536
+ S_NOP 0, implicit %1.sub0
+ S_NOP 0, implicit %1.sub1
+
+ ; CHECK: S_NOP 0, implicit-def $sgpr0_sgpr1
+ S_NOP 0, implicit-def $sgpr0_sgpr1, implicit-def $sgpr2_sgpr3, implicit-def $sgpr4_sgpr5, implicit-def $sgpr6_sgpr7
+
+ ; %2: uses both sub0 and sub1 - no shrink expected.
+ S_NOP 0, implicit %2.sub0
+ S_NOP 0, implicit %2.sub1
+
+ ; %1: both lanes used after barrier, rematerialized as S_MOV_B64, not shrunk.
+ ; CHECK: renamable $[[REG1LO:sgpr[0-9]+]]_[[REG1HI:sgpr[0-9]+]] = S_MOV_B64 180388627769
+ ; CHECK-NEXT: S_NOP 0, implicit renamable $[[REG1HI]]
+ ; CHECK-NEXT: S_NOP 0, implicit killed renamable $[[REG1LO]]
+ S_NOP 0, implicit %1.sub1
+ S_NOP 0, implicit %1.sub0
+
+ ; %3: only sub3 used - shrink to S_MOV_B32 with Hi_32(0x0000000700000065) = 7.
+ ; CHECK: renamable $[[REG3:sgpr[0-9]+]] = S_MOV_B32 7
+ ; CHECK-NEXT: S_NOP 0, implicit renamable $[[REG3]]
+ ; CHECK-NEXT: S_NOP 0, implicit killed renamable $[[REG3]]
+ S_NOP 0, implicit %3.sub3
+ S_NOP 0, implicit %3.sub3
+
+ ; %4: only sub2 used - shrink to S_MOV_B32 with Lo_32(0x0000002A00000539) = 1337.
+ ; CHECK: renamable $[[REG4:sgpr[0-9]+]] = S_MOV_B32 1337
+ ; CHECK-NEXT: S_NOP 0, implicit killed renamable $[[REG4]]
+ S_NOP 0, implicit %4.sub2
+
+ ; %5: sub0_sub1 def, only sub1 used - shrink to S_MOV_B32 with Hi_32(0x0000002A00000539) = 42.
+ ; CHECK: renamable $[[REG5:sgpr[0-9]+]] = S_MOV_B32 42
+ ; CHECK-NEXT: S_NOP 0, implicit killed renamable $[[REG5]]
+ S_NOP 0, implicit %5.sub1
+
+ ; CHECK: renamable $[[REG6:sgpr[0-9]+]] = S_MOV_B32 65536
+ ; CHECK-NEXT: S_NOP 0, implicit killed renamable $[[REG6]]
+ S_NOP 0, implicit %6.sub0
+
+ ; CHECK: S_ENDPGM 0
+ S_ENDPGM 0, implicit %0, implicit %1.sub0
+...
>From 6aa22c8aa116a24ff467cdf11ba8549006a46129 Mon Sep 17 00:00:00 2001
From: Vlad <Vladislav.Romanov at amd.com>
Date: Wed, 4 Mar 2026 13:04:00 -0600
Subject: [PATCH 2/2] Changed solution to propagate LaneBitmask
---
llvm/include/llvm/CodeGen/LiveRangeEdit.h | 5 +-
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 5 +-
llvm/lib/CodeGen/InlineSpiller.cpp | 13 +++-
llvm/lib/CodeGen/LiveRangeEdit.cpp | 5 +-
llvm/lib/CodeGen/SplitKit.cpp | 10 ++-
llvm/lib/CodeGen/TargetInstrInfo.cpp | 3 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 69 +++++----------------
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 3 +-
llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 3 +-
llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 3 +-
llvm/lib/Target/X86/X86InstrInfo.cpp | 3 +-
llvm/lib/Target/X86/X86InstrInfo.h | 3 +-
12 files changed, 60 insertions(+), 65 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h
index d0ed3ff660d9b..1dfacd041fa41 100644
--- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h
@@ -182,12 +182,15 @@ class LiveRangeEdit : private MachineRegisterInfo::Delegate {
/// instruction into MBB before MI. The new instruction is mapped, but
/// liveness is not updated. If ReplaceIndexMI is not null it will be replaced
/// by new MI in the index map.
+ /// \p UsedLanes is a bitmask of the lanes that are live at the
+ /// rematerialization point, forwarded to TII.reMaterialize.
/// Return the SlotIndex of the new instruction.
SlotIndex rematerializeAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, Register DestReg,
const Remat &RM, const TargetRegisterInfo &,
bool Late = false, unsigned SubIdx = 0,
- MachineInstr *ReplaceIndexMI = nullptr);
+ MachineInstr *ReplaceIndexMI = nullptr,
+ LaneBitmask UsedLanes = LaneBitmask::getAll());
/// markRematerialized - explicitly mark a value as rematerialized after doing
/// it manually.
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index ed968d7bd4593..6a852ccec3993 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -462,9 +462,12 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
/// The register in Orig->getOperand(0).getReg() will be substituted by
/// DestReg:SubIdx. Any existing subreg index is preserved or composed with
/// SubIdx.
+ /// \p UsedLanes is a bitmask of the lanes that are live at the
+ /// rematerialization point.
virtual void reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, Register DestReg,
- unsigned SubIdx, const MachineInstr &Orig) const;
+ unsigned SubIdx, const MachineInstr &Orig,
+ LaneBitmask UsedLanes = LaneBitmask::getAll()) const;
/// Clones instruction or the whole instruction bundle \p Orig and
/// insert into \p MBB before \p InsertBefore. The target may update operands
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 7dd629a0962d5..668c7c0a78098 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -726,9 +726,18 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Constrain it to the register class of MI.
MRI.constrainRegClass(NewVReg, MRI.getRegClass(VirtReg.reg()));
+ // Compute which lanes of the virtual register are live at the use point.
+ LaneBitmask UsedLanes = LaneBitmask::getAll();
+ if (VirtReg.hasSubRanges()) {
+ UsedLanes = LaneBitmask::getNone();
+ for (const LiveInterval::SubRange &SR : VirtReg.subranges())
+ if (SR.liveAt(UseIdx))
+ UsedLanes |= SR.LaneMask;
+ }
+
// Finally we can rematerialize OrigMI before MI.
- SlotIndex DefIdx =
- Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
+ SlotIndex DefIdx = Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM,
+ TRI, false, 0, nullptr, UsedLanes);
// We take the DebugLoc from MI, since OrigMI may be attributed to a
// different source location.
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 6fe11704a9137..9b21d8681c81d 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -86,9 +86,10 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
Register DestReg, const Remat &RM,
const TargetRegisterInfo &tri,
bool Late, unsigned SubIdx,
- MachineInstr *ReplaceIndexMI) {
+ MachineInstr *ReplaceIndexMI,
+ LaneBitmask UsedLanes) {
assert(RM.OrigMI && "Invalid remat");
- TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI);
+ TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI, UsedLanes);
// DestReg of the cloned instruction cannot be Dead. Set isDead of DestReg
// to false anyway in case the isDead flag of RM.OrigMI's dest register
// is true.
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 0834e23a98efc..582016482389c 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -643,7 +643,15 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, const VNInfo *ParentVNI,
if (RM.OrigMI && TII.isAsCheapAsAMove(*RM.OrigMI) &&
Edit->canRematerializeAt(RM, UseIdx)) {
if (!rematWillIncreaseRestriction(RM.OrigMI, MBB, UseIdx)) {
- SlotIndex Def = Edit->rematerializeAt(MBB, I, Reg, RM, TRI, Late);
+ LaneBitmask UsedLanes = LaneBitmask::getAll();
+ if (OrigLI.hasSubRanges()) {
+ UsedLanes = LaneBitmask::getNone();
+ for (const LiveInterval::SubRange &SR : OrigLI.subranges())
+ if (SR.liveAt(UseIdx))
+ UsedLanes |= SR.LaneMask;
+ }
+ SlotIndex Def = Edit->rematerializeAt(MBB, I, Reg, RM, TRI, Late,
+ 0, nullptr, UsedLanes);
++NumRemats;
// Define the value in Reg.
return defValue(RegIdx, ParentVNI, Def, false);
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 8b26ab8da124b..3e3b935135350 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -447,7 +447,8 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register DestReg, unsigned SubIdx,
- const MachineInstr &Orig) const {
+ const MachineInstr &Orig,
+ LaneBitmask UsedLanes) const {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);
MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index c8003e4bae9a2..f718044d089e8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2610,7 +2610,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, Register DestReg,
unsigned SubIdx,
- const MachineInstr &Orig) const {
+ const MachineInstr &Orig,
+ LaneBitmask UsedLanes) const {
// Try shrinking the instruction to remat only the part needed for current
// context.
@@ -2622,65 +2623,29 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB,
if (SubIdx != 0)
break;
- if (I == MBB.end())
- break;
-
- if (I->isBundled())
- break;
-
if (!Orig.getOperand(1).isImm())
break;
- // Shrink S_MOV_B64 to S_MOV_B32 when the use at the insertion point
- // only needs a single 32-bit subreg of the defined value.
-
- // Scan all uses of the original register from the insertion point
- // and verify that all uses in the same live range read the same lane.
- // Stop at a def of RegToFind since that starts a new live
- // range whose uses won't be rewritten to our DestReg.
- Register RegToFind = Orig.getOperand(0).getReg();
- unsigned UseSubReg = AMDGPU::NoSubRegister;
-
- [&]() {
- for (auto It = I; It != MBB.end(); ++It) {
- for (auto &MO : It->operands()) {
- // Skip irrelevant registers
- if (!MO.isReg() || MO.getReg() != RegToFind)
- continue;
-
- // Stop at a new live range
- if (MO.isDef())
- return;
-
- if (UseSubReg == AMDGPU::NoSubRegister) {
- UseSubReg = MO.getSubReg();
- continue;
- }
+ // Shrink S_MOV_B64 to S_MOV_B32 when UsedLanes indicates only a single
+ // 32-bit lane of the 64-bit value is live at the rematerialization point.
+ if (UsedLanes.all())
+ break;
- // Bail out if subregs do not match between uses
- if (MO.getSubReg() != UseSubReg) {
- UseSubReg = AMDGPU::NoSubRegister;
- return;
- }
- }
- }
- }();
+ // Determine which half of the 64-bit immediate corresponds to the use.
+ unsigned OrigSubReg = Orig.getOperand(0).getSubReg();
+ unsigned LoSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub0);
+ unsigned HiSubReg = RI.composeSubRegIndices(OrigSubReg, AMDGPU::sub1);
- if (UseSubReg == AMDGPU::NoSubRegister)
- break;
+ bool NeedLo = (UsedLanes & RI.getSubRegIndexLaneMask(LoSubReg)).any();
+ bool NeedHi = (UsedLanes & RI.getSubRegIndexLaneMask(HiSubReg)).any();
- if (RI.getSubRegIdxSize(UseSubReg) != 32)
+ if (NeedLo && NeedHi)
break;
- // Determine which half of the 64-bit immediate corresponds to the use.
- unsigned UseOffset = RI.getSubRegIdxOffset(UseSubReg);
- unsigned OrigSubReg = Orig.getOperand(0).getSubReg();
- unsigned DefOffset =
- (OrigSubReg == AMDGPU::NoSubRegister)
- ? 0
- : RI.getSubRegIdxOffset(Orig.getOperand(0).getSubReg());
int64_t Imm64 = Orig.getOperand(1).getImm();
- int32_t Imm32 = (UseOffset == DefOffset) ? Lo_32(Imm64) : Hi_32(Imm64);
+ int32_t Imm32 = NeedLo ? Lo_32(Imm64) : Hi_32(Imm64);
+
+ unsigned UseSubReg = NeedLo ? LoSubReg : HiSubReg;
// Emit S_MOV_B32 defining just the needed 32-bit subreg of DestReg.
BuildMI(MBB, I, Orig.getDebugLoc(), get(AMDGPU::S_MOV_B32))
@@ -2762,7 +2727,7 @@ void SIInstrInfo::reMaterialize(MachineBasicBlock &MBB,
break;
}
- TargetInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig);
+ TargetInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, UsedLanes);
}
std::pair<MachineInstr*, MachineInstr*>
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index dd4c97f0c5746..dd3e0b71bc198 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -334,7 +334,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
Register DestReg, unsigned SubIdx,
- const MachineInstr &Orig) const override;
+ const MachineInstr &Orig,
+ LaneBitmask UsedLanes = LaneBitmask::getAll()) const override;
// Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
// instructions. Returns a pair of generated instructions.
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index bc5a89bc5d7f4..79ba2aee1a1bb 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1661,7 +1661,8 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register DestReg, unsigned SubIdx,
- const MachineInstr &Orig) const {
+ const MachineInstr &Orig,
+ LaneBitmask UsedLanes) const {
unsigned Opcode = Orig.getOpcode();
switch (Opcode) {
default: {
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 2818f13c5d000..2e8bfacf80075 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -231,7 +231,8 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
Register DestReg, unsigned SubIdx,
- const MachineInstr &Orig) const override;
+ const MachineInstr &Orig,
+ LaneBitmask UsedLanes = LaneBitmask::getAll()) const override;
MachineInstr &
duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 2479a8dccfb00..e98ff9bf8f435 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -961,7 +961,8 @@ bool X86InstrInfo::isReMaterializableImpl(
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register DestReg, unsigned SubIdx,
- const MachineInstr &Orig) const {
+ const MachineInstr &Orig,
+ LaneBitmask UsedLanes) const {
bool ClobbersEFLAGS = Orig.modifiesRegister(X86::EFLAGS, &TRI);
if (ClobbersEFLAGS && MBB.computeRegisterLiveness(&TRI, X86::EFLAGS, I) !=
MachineBasicBlock::LQR_Dead) {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index de8ccb44578a3..43ab9b37ce4c8 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -342,7 +342,8 @@ class X86InstrInfo final : public X86GenInstrInfo {
bool isReMaterializableImpl(const MachineInstr &MI) const override;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
Register DestReg, unsigned SubIdx,
- const MachineInstr &Orig) const override;
+ const MachineInstr &Orig,
+ LaneBitmask UsedLanes = LaneBitmask::getAll()) const override;
/// Given an operand within a MachineInstr, insert preceding code to put it
/// into the right format for a particular kind of LEA instruction. This may
More information about the llvm-commits
mailing list