[llvm] r243010 - [X86] Allow load folding into PUSH instructions

Kuperstein, Michael M michael.m.kuperstein at intel.com
Sun Jul 26 00:11:16 PDT 2015


Yes.
I’m open to suggestions about the name, btw – I’m not at all attached to having “msrom” in it.

Michael

From: Sean Silva [mailto:chisophugis at gmail.com]
Sent: Friday, July 24, 2015 07:29
To: Kuperstein, Michael M
Cc: llvm-commits at cs.uiuc.edu
Subject: Re: [llvm] r243010 - [X86] Allow load folding into PUSH instructions



On Thu, Jul 23, 2015 at 5:23 AM, Michael Kuperstein <michael.m.kuperstein at intel.com<mailto:michael.m.kuperstein at intel.com>> wrote:
Author: mkuper
Date: Thu Jul 23 07:23:45 2015
New Revision: 243010

URL: http://llvm.org/viewvc/llvm-project?rev=243010&view=rev
Log:
[X86] Allow load folding into PUSH instructions

Adds pushes to the folding tables.
This also required a fix to the TD definition, since the memory forms of
the push instructions did not have the right mayLoad/mayStore flags.

Differential Revision: http://reviews.llvm.org/D11340

Added:
    llvm/trunk/test/CodeGen/X86/fold-push.ll
Modified:
    llvm/trunk/lib/Target/X86/X86.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.td

Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=243010&r1=243009&r2=243010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Thu Jul 23 07:23:45 2015
@@ -181,6 +181,11 @@ def FeatureSlowDivide64 : SubtargetFeatu
 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
                                      "PadShortFunctions", "true",
                                      "Pad short functions">;
+// TODO: This feature ought to be renamed.
+// What it really refers to are CPUs where instruction that cause MSROM
+// lookups are expensive, so alternative sequences should be preferred.

What is "MSROM" in this context? Do you just mean that the instruction is microcoded?

-- Sean Silva

+// The best examples of this are the memory forms of CALL and PUSH
+// instructions, which should be avoided in favor of a MOV + register CALL/PUSH.
 def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
                                      "CallRegIndirect", "true",
                                      "Call register indirect">;

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=243010&r1=243009&r2=243010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu Jul 23 07:23:45 2015
@@ -332,6 +332,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::MUL8r,       X86::MUL8m,         TB_FOLDED_LOAD },
     { X86::PEXTRDrr,    X86::PEXTRDmr,      TB_FOLDED_STORE },
     { X86::PEXTRQrr,    X86::PEXTRQmr,      TB_FOLDED_STORE },
+    { X86::PUSH16r,     X86::PUSH16rmm,     TB_FOLDED_LOAD },
+    { X86::PUSH32r,     X86::PUSH32rmm,     TB_FOLDED_LOAD },
+    { X86::PUSH64r,     X86::PUSH64rmm,     TB_FOLDED_LOAD },
     { X86::SETAEr,      X86::SETAEm,        TB_FOLDED_STORE },
     { X86::SETAr,       X86::SETAm,         TB_FOLDED_STORE },
     { X86::SETBEr,      X86::SETBEm,        TB_FOLDED_STORE },
@@ -4878,10 +4881,14 @@ MachineInstr *X86InstrInfo::foldMemoryOp
   bool isCallRegIndirect = Subtarget.callRegIndirect();
   bool isTwoAddrFold = false;

-  // For CPUs that favor the register form of a call,
-  // do not fold loads into calls.
-  if (isCallRegIndirect &&
-    (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r))
+  // For CPUs that favor the register form of a call or push,
+  // do not fold loads into calls or pushes, unless optimizing for size
+  // aggressively.
+  if (isCallRegIndirect &&
+      !MF.getFunction()->hasFnAttribute(Attribute::MinSize) &&
+      (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r ||
+       MI->getOpcode() == X86::PUSH16r || MI->getOpcode() == X86::PUSH32r ||
+       MI->getOpcode() == X86::PUSH64r))
     return nullptr;

   unsigned NumOps = MI->getDesc().getNumOperands();

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=243010&r1=243009&r2=243010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Thu Jul 23 07:23:45 2015
@@ -1022,12 +1022,8 @@ def PUSH32r  : I<0x50, AddRegFrm, (outs)
                  IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>;
 def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
                  IIC_PUSH_REG>, OpSize16;
-def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
-                 IIC_PUSH_MEM>, OpSize16;
 def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
                  IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>;
-def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
-                 IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;

 def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm),
                    "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
@@ -1041,6 +1037,14 @@ def PUSHi32  : Ii32<0x68, RawFrm, (outs)
                    "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
                    Requires<[Not64BitMode]>;
 } // mayStore, SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
+                 IIC_PUSH_MEM>, OpSize16;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
+                 IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
+} // mayLoad, mayStore, SchedRW
+
 }

 let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
@@ -1073,9 +1077,11 @@ def PUSH64r  : I<0x50, AddRegFrm, (outs)
                  IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
 def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
                  IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
+} // mayStore, SchedRW
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
 def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
                  IIC_PUSH_MEM>, OpSize32, Requires<[In64BitMode]>;
-} // mayStore, SchedRW
+} // mayLoad, mayStore, SchedRW
 }

 let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,

Added: llvm/trunk/test/CodeGen/X86/fold-push.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-push.ll?rev=243010&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-push.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fold-push.ll Thu Jul 23 07:23:45 2015
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL
+; RUN: llc < %s -mtriple=i686-windows -mattr=call-reg-indirect | FileCheck %s -check-prefix=CHECK -check-prefix=SLM
+
+declare void @foo(i32 %r)
+
+define void @test(i32 %a, i32 %b) optsize {
+; CHECK-LABEL: test:
+; CHECK: movl [[EAX:%e..]], (%esp)
+; CHECK-NEXT: pushl [[EAX]]
+; CHECK-NEXT: calll
+; CHECK-NEXT: addl $4, %esp
+; CHECK: nop
+; NORMAL: pushl (%esp)
+; SLM: movl (%esp), [[RELOAD:%e..]]
+; SLM-NEXT: pushl [[RELOAD]]
+; CHECK: calll
+; CHECK-NEXT: addl $4, %esp
+  %c = add i32 %a, %b
+  call void @foo(i32 %c)
+  call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
+  call void @foo(i32 %c)
+  ret void
+}
+
+define void @test_min(i32 %a, i32 %b) minsize {
+; CHECK-LABEL: test_min:
+; CHECK: movl [[EAX:%e..]], (%esp)
+; CHECK-NEXT: pushl [[EAX]]
+; CHECK-NEXT: calll
+; CHECK-NEXT: addl $4, %esp
+; CHECK: nop
+; CHECK: pushl (%esp)
+; CHECK: calll
+; CHECK-NEXT: addl $4, %esp
+  %c = add i32 %a, %b
+  call void @foo(i32 %c)
+  call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
+  call void @foo(i32 %c)
+  ret void
+}


_______________________________________________
llvm-commits mailing list
llvm-commits at cs.uiuc.edu<mailto:llvm-commits at cs.uiuc.edu>
http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

---------------------------------------------------------------------
Intel Israel (74) Limited

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150726/40738fde/attachment.html>


More information about the llvm-commits mailing list