[llvm] r287792 - [X86] Allow folding of stack reloads when loading a subreg of the spilled reg
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 24 19:50:53 PST 2016
On 11/23/2016 10:33 AM, Michael Kuperstein via llvm-commits wrote:
> Author: mkuper
> Date: Wed Nov 23 12:33:49 2016
> New Revision: 287792
>
> URL: http://llvm.org/viewvc/llvm-project?rev=287792&view=rev
> Log:
> [X86] Allow folding of stack reloads when loading a subreg of the spilled reg
>
> We did not support subregs in InlineSpiller:foldMemoryOperand() because targets
> may not deal with them correctly.
>
> This adds a target hook to let the spiller know that a target can handle
> subregs, and actually enables it for x86 for the case of stack slot reloads.
> This fixes PR30832.
This feels like a weird design. If I remember correctly,
foldMemoryOperand is allowed to do nothing if it doesn't know how to
fold. Given this, why not just update the in tree targets to check for
a sub-reg load and bail out? Why do we need yet another target hook?
>
> Differential Revision: https://reviews.llvm.org/D26521
>
> Modified:
> llvm/trunk/include/llvm/Target/TargetInstrInfo.h
> llvm/trunk/lib/CodeGen/InlineSpiller.cpp
> llvm/trunk/lib/CodeGen/TargetInstrInfo.cpp
> llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> llvm/trunk/lib/Target/X86/X86InstrInfo.h
> llvm/trunk/test/CodeGen/X86/partial-fold32.ll
> llvm/trunk/test/CodeGen/X86/partial-fold64.ll
> llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
>
> Modified: llvm/trunk/include/llvm/Target/TargetInstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetInstrInfo.h?rev=287792&r1=287791&r2=287792&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Target/TargetInstrInfo.h (original)
> +++ llvm/trunk/include/llvm/Target/TargetInstrInfo.h Wed Nov 23 12:33:49 2016
> @@ -817,6 +817,20 @@ public:
> /// anything was changed.
> virtual bool expandPostRAPseudo(MachineInstr &MI) const { return false; }
>
> + /// Check whether the target can fold a load that feeds a subreg operand
> + /// (or a subreg operand that feeds a store).
> + /// For example, X86 may want to return true if it can fold
> + /// movl (%esp), %eax
> + /// subb, %al, ...
> + /// Into:
> + /// subb (%esp), ...
> + ///
> + /// Ideally, we'd like the target implementation of foldMemoryOperand() to
> + /// reject subregs - but since this behavior used to be enforced in the
> + /// target-independent code, moving this responsibility to the targets
> + /// has the potential of causing nasty silent breakage in out-of-tree targets.
> + virtual bool isSubregFoldable() const { return false; }
> +
> /// Attempt to fold a load or store of the specified stack
> /// slot into the specified machine instruction for the specified operand(s).
> /// If this is possible, a new instruction is returned with the specified
>
> Modified: llvm/trunk/lib/CodeGen/InlineSpiller.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/InlineSpiller.cpp?rev=287792&r1=287791&r2=287792&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/InlineSpiller.cpp (original)
> +++ llvm/trunk/lib/CodeGen/InlineSpiller.cpp Wed Nov 23 12:33:49 2016
> @@ -739,9 +739,12 @@ foldMemoryOperand(ArrayRef<std::pair<Mac
> bool WasCopy = MI->isCopy();
> unsigned ImpReg = 0;
>
> - bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT ||
> - MI->getOpcode() == TargetOpcode::PATCHPOINT ||
> - MI->getOpcode() == TargetOpcode::STACKMAP);
> + // Spill subregs if the target allows it.
> + // We always want to spill subregs for stackmap/patchpoint pseudos.
> + bool SpillSubRegs = TII.isSubregFoldable() ||
> + MI->getOpcode() == TargetOpcode::STATEPOINT ||
> + MI->getOpcode() == TargetOpcode::PATCHPOINT ||
> + MI->getOpcode() == TargetOpcode::STACKMAP;
>
> // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
> // operands.
> @@ -754,7 +757,7 @@ foldMemoryOperand(ArrayRef<std::pair<Mac
> ImpReg = MO.getReg();
> continue;
> }
> - // FIXME: Teach targets to deal with subregs.
> +
> if (!SpillSubRegs && MO.getSubReg())
> return false;
> // We cannot fold a load instruction into a def.
>
> Modified: llvm/trunk/lib/CodeGen/TargetInstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TargetInstrInfo.cpp?rev=287792&r1=287791&r2=287792&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/TargetInstrInfo.cpp (original)
> +++ llvm/trunk/lib/CodeGen/TargetInstrInfo.cpp Wed Nov 23 12:33:49 2016
> @@ -515,6 +515,31 @@ MachineInstr *TargetInstrInfo::foldMemor
> assert(MBB && "foldMemoryOperand needs an inserted instruction");
> MachineFunction &MF = *MBB->getParent();
>
> + // If we're not folding a load into a subreg, the size of the load is the
> + // size of the spill slot. But if we are, we need to figure out what the
> + // actual load size is.
> + int64_t MemSize = 0;
> + const MachineFrameInfo &MFI = MF.getFrameInfo();
> + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
> +
> + if (Flags & MachineMemOperand::MOStore) {
> + MemSize = MFI.getObjectSize(FI);
> + } else {
> + for (unsigned Idx : Ops) {
> + int64_t OpSize = MFI.getObjectSize(FI);
> +
> + if (auto SubReg = MI.getOperand(Idx).getSubReg()) {
> + unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg);
> + if (SubRegSize > 0 && !(SubRegSize % 8))
> + OpSize = SubRegSize / 8;
> + }
> +
> + MemSize = std::max(MemSize, OpSize);
> + }
> + }
> +
> + assert(MemSize && "Did not expect a zero-sized stack slot");
> +
> MachineInstr *NewMI = nullptr;
>
> if (MI.getOpcode() == TargetOpcode::STACKMAP ||
> @@ -538,10 +563,9 @@ MachineInstr *TargetInstrInfo::foldMemor
> assert((!(Flags & MachineMemOperand::MOLoad) ||
> NewMI->mayLoad()) &&
> "Folded a use to a non-load!");
> - const MachineFrameInfo &MFI = MF.getFrameInfo();
> assert(MFI.getObjectOffset(FI) != -1);
> MachineMemOperand *MMO = MF.getMachineMemOperand(
> - MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI),
> + MachinePointerInfo::getFixedStack(MF, FI), Flags, MemSize,
> MFI.getObjectAlignment(FI));
> NewMI->addMemOperand(MF, MMO);
>
> @@ -558,7 +582,6 @@ MachineInstr *TargetInstrInfo::foldMemor
>
> const MachineOperand &MO = MI.getOperand(1 - Ops[0]);
> MachineBasicBlock::iterator Pos = MI;
> - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
>
> if (Flags == MachineMemOperand::MOStore)
> storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=287792&r1=287791&r2=287792&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Nov 23 12:33:49 2016
> @@ -6843,6 +6843,14 @@ X86InstrInfo::foldMemoryOperandImpl(Mach
> if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
> return nullptr;
>
> + // Don't fold subreg spills, or reloads that use a high subreg.
> + for (auto Op : Ops) {
> + MachineOperand &MO = MI.getOperand(Op);
> + auto SubReg = MO.getSubReg();
> + if (SubReg && (MO.isDef() || SubReg == X86::sub_8bit_hi))
> + return nullptr;
> + }
> +
> const MachineFrameInfo &MFI = MF.getFrameInfo();
> unsigned Size = MFI.getObjectSize(FrameIndex);
> unsigned Alignment = MFI.getObjectAlignment(FrameIndex);
> @@ -6967,6 +6975,14 @@ MachineInstr *X86InstrInfo::foldMemoryOp
> MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
> MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
> LiveIntervals *LIS) const {
> +
> + // TODO: Support the case where LoadMI loads a wide register, but MI
> + // only uses a subreg.
> + for (auto Op : Ops) {
> + if (MI.getOperand(Op).getSubReg())
> + return nullptr;
> + }
> +
> // If loading from a FrameIndex, fold directly from the FrameIndex.
> unsigned NumOps = LoadMI.getDesc().getNumOperands();
> int FrameIndex;
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=287792&r1=287791&r2=287792&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Wed Nov 23 12:33:49 2016
> @@ -378,6 +378,10 @@ public:
>
> bool expandPostRAPseudo(MachineInstr &MI) const override;
>
> + /// Check whether the target can fold a load that feeds a subreg operand
> + /// (or a subreg operand that feeds a store).
> + bool isSubregFoldable() const override { return true; }
> +
> /// foldMemoryOperand - If this target supports it, fold a load or store of
> /// the specified stack slot into the specified machine instruction for the
> /// specified operand(s). If this is possible, the target should perform the
>
> Modified: llvm/trunk/test/CodeGen/X86/partial-fold32.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/partial-fold32.ll?rev=287792&r1=287791&r2=287792&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/partial-fold32.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/partial-fold32.ll Wed Nov 23 12:33:49 2016
> @@ -3,8 +3,7 @@
> define fastcc i8 @fold32to8(i32 %add, i8 %spill) {
> ; CHECK-LABEL: fold32to8:
> ; CHECK: movl %ecx, (%esp) # 4-byte Spill
> -; CHECK: movl (%esp), %eax # 4-byte Reload
> -; CHECK: subb %al, %dl
> +; CHECK: subb (%esp), %dl # 1-byte Folded Reload
> entry:
> tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
> %trunc = trunc i32 %add to i8
>
> Modified: llvm/trunk/test/CodeGen/X86/partial-fold64.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/partial-fold64.ll?rev=287792&r1=287791&r2=287792&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/partial-fold64.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/partial-fold64.ll Wed Nov 23 12:33:49 2016
> @@ -3,8 +3,7 @@
> define i32 @fold64to32(i64 %add, i32 %spill) {
> ; CHECK-LABEL: fold64to32:
> ; CHECK: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
> -; CHECK: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
> -; CHECK: subl %eax, %esi
> +; CHECK: subl -{{[0-9]+}}(%rsp), %esi # 4-byte Folded Reload
> entry:
> tail call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"()
> %trunc = trunc i64 %add to i32
> @@ -15,8 +14,7 @@ entry:
> define i8 @fold64to8(i64 %add, i8 %spill) {
> ; CHECK-LABEL: fold64to8:
> ; CHECK: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
> -; CHECK: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
> -; CHECK: subb %al, %sil
> +; CHECK: subb -{{[0-9]+}}(%rsp), %sil # 1-byte Folded Reload
> entry:
> tail call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"()
> %trunc = trunc i64 %add to i8
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=287792&r1=287791&r2=287792&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Wed Nov 23 12:33:49 2016
> @@ -4788,9 +4788,8 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x
> ; AVX1-NEXT: orl %ebx, %r14d
> ; AVX1-NEXT: shlq $32, %r14
> ; AVX1-NEXT: orq %r15, %r14
> -; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
> -; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
> -; AVX1-NEXT: vzeroupper
> +; AVX1-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
> +; AVX1-NEXT: # xmm0 = mem[1,0]
> ; AVX1-NEXT: callq __truncdfhf2
> ; AVX1-NEXT: movw %ax, %bx
> ; AVX1-NEXT: shll $16, %ebx
> @@ -4856,9 +4855,8 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x
> ; AVX2-NEXT: orl %ebx, %r14d
> ; AVX2-NEXT: shlq $32, %r14
> ; AVX2-NEXT: orq %r15, %r14
> -; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
> -; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
> -; AVX2-NEXT: vzeroupper
> +; AVX2-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
> +; AVX2-NEXT: # xmm0 = mem[1,0]
> ; AVX2-NEXT: callq __truncdfhf2
> ; AVX2-NEXT: movw %ax, %bx
> ; AVX2-NEXT: shll $16, %ebx
> @@ -5585,9 +5583,8 @@ define void @store_cvt_8f64_to_8i16(<8 x
> ; AVX1-NEXT: vzeroupper
> ; AVX1-NEXT: callq __truncdfhf2
> ; AVX1-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
> -; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
> -; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
> -; AVX1-NEXT: vzeroupper
> +; AVX1-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
> +; AVX1-NEXT: # xmm0 = mem[1,0]
> ; AVX1-NEXT: callq __truncdfhf2
> ; AVX1-NEXT: movl %eax, %r12d
> ; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
> @@ -5654,9 +5651,8 @@ define void @store_cvt_8f64_to_8i16(<8 x
> ; AVX2-NEXT: vzeroupper
> ; AVX2-NEXT: callq __truncdfhf2
> ; AVX2-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
> -; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
> -; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
> -; AVX2-NEXT: vzeroupper
> +; AVX2-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
> +; AVX2-NEXT: # xmm0 = mem[1,0]
> ; AVX2-NEXT: callq __truncdfhf2
> ; AVX2-NEXT: movl %eax, %r12d
> ; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list