[PATCH] D14178: Alternative to long nops for X86 CPUs

Nadav Rotem via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 30 21:47:11 PDT 2015


LGTM!

> On Oct 29, 2015, at 9:11 AM, Andrey Turetskiy <andrey.turetskiy at gmail.com> wrote:
> 
> aturetsk created this revision.
> aturetsk added a reviewer: nadav.
> aturetsk added subscribers: llvm-commits, zansari, anadolskiy, mkuper, DavidKreitzer.
> 
> Make X86AsmBackend generate smarter nops instead of a bunch of 0x90 for code alignment for CPUs which don't support long nop instructions.
> 
> http://reviews.llvm.org/D14178
> 
> Files:
>  lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
>  test/MC/X86/x86_nop.s
> 
> Index: test/MC/X86/x86_nop.s
> ===================================================================
> --- test/MC/X86/x86_nop.s
> +++ test/MC/X86/x86_nop.s
> @@ -22,13 +22,7 @@
> inc %eax
> 
> // CHECK: 0:	40                                           	incl	%eax
> -// CHECK: 1:	90                                           	nop
> -// CHECK: 2:	90                                           	nop
> -// CHECK: 3:	90                                           	nop
> -// CHECK: 4:	90                                           	nop
> -// CHECK: 5:	90                                           	nop
> -// CHECK: 6:	90                                           	nop
> -// CHECK: 7:	90                                           	nop
> +// CHECK: 1:	8d b4 26 00 00 00 00                            leal (%esi), %esi
> // CHECK: 8:	40                                           	incl	%eax
> 
> 
> Index: lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
> ===================================================================
> --- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
> +++ lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
> @@ -69,15 +69,19 @@
> class X86AsmBackend : public MCAsmBackend {
>   const StringRef CPU;
>   bool HasNopl;
> -  const uint64_t MaxNopLength;
> +  uint64_t MaxNopLength;
> public:
> -  X86AsmBackend(const Target &T, StringRef CPU)
> -      : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) {
> +  X86AsmBackend(const Target &T, StringRef CPU) : MCAsmBackend(), CPU(CPU) {
>     HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
>               CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
>               CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
>               CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&
>               CPU != "c3" && CPU != "c3-2";
> +    // Max length of true long nop instruction is 15 bytes.
> +    // Max length of long nop replacement instruction is 7 bytes.
> +    // Taking into account SilverMont architecture features max length of nops
> +    // is reduced for it to achieve better performance.
> +    MaxNopLength = (!HasNopl || CPU == "slm") ? 7 : 15;
>   }
> 
>   unsigned getNumFixupKinds() const override {
> @@ -279,7 +283,7 @@
> /// bytes.
> /// \return - true on success, false on failure
> bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
> -  static const uint8_t Nops[10][10] = {
> +  static const uint8_t TrueNops[10][10] = {
>     // nop
>     {0x90},
>     // xchg %ax,%ax
> @@ -302,17 +306,31 @@
>     {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
>   };
> 
> -  // This CPU doesn't support long nops. If needed add more.
> -  // FIXME: Can we get this from the subtarget somehow?
> -  // FIXME: We could generated something better than plain 0x90.
> -  if (!HasNopl) {
> -    for (uint64_t i = 0; i < Count; ++i)
> -      OW->write8(0x90);
> -    return true;
> -  }
> +  // Alternative nop instructions for CPUs which don't support long nops.
> +  static const uint8_t AltNops[7][10] = {
> +      // nop
> +      {0x90},
> +      // xchg %ax,%ax
> +      {0x66, 0x90},
> +      // lea 0x0(%esi),%esi
> +      {0x8d, 0x76, 0x00},
> +      // lea 0x0(%esi),%esi
> +      {0x8d, 0x74, 0x26, 0x00},
> +      // nop + lea 0x0(%esi),%esi
> +      {0x90, 0x8d, 0x74, 0x26, 0x00},
> +      // lea 0x0(%esi),%esi
> +      {0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00 },
> +      // lea 0x0(%esi),%esi
> +      {0x8d, 0xb4, 0x26, 0x00, 0x00, 0x00, 0x00},
> +  };
> +
> +  // Select the right NOP table.
> +  // FIXME: Can we get if CPU supports long nops from the subtarget somehow?
> +  const uint8_t (*Nops)[10] = HasNopl ? TrueNops : AltNops;
> +  assert(HasNopl || MaxNopLength <= 7);
> 
> -  // 15 is the longest single nop instruction.  Emit as many 15-byte nops as
> -  // needed, then emit a nop of the remaining length.
> +  // Emit as many largest nops as needed, then emit a nop of the remaining
> +  // length.
>   do {
>     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
>     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
> 
> 
> <D14178.38742.patch>



More information about the llvm-commits mailing list