[llvm-commits] [llvm] r139854 - /llvm/trunk/lib/Target/X86/X86InstrInfo.cpp

Chris Lattner clattner at apple.com
Thu Sep 15 15:55:23 PDT 2011


On Sep 15, 2011, at 2:42 PM, Bruno Cardoso Lopes wrote:

> Author: bruno
> Date: Thu Sep 15 16:42:23 2011
> New Revision: 139854
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=139854&view=rev
> Log:
> Factor out partial register update checks for some SSE instructions.
> Also add the AVX versions and add comments!

Hi Bruno,

I know it isn't your fault, but could hasPartialRegUpdate become a "TSFlag" bit so it can be in the .td files instead of .cpp code?

-Chris

> 
> Modified:
>    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> 
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=139854&r1=139853&r2=139854&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu Sep 15 16:42:23 2011
> @@ -2558,6 +2558,49 @@
>   return NULL;
> }
> 
> +/// hasPartialRegUpdate - Return true for all instructions that only update
> +/// the first 32 or 64-bits of the destination register and leave the rest
> +/// unmodified. This can be used to avoid folding loads if the instructions
> +/// only update part of the destination register, and the non-updated part is
> +/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
> +/// instructions breaks the partial register dependency and it can improve
> +/// performance. e.g.:
> +///
> +///   movss (%rdi), %xmm0
> +///   cvtss2sd %xmm0, %xmm0
> +///
> +/// Instead of
> +///   cvtss2sd (%rdi), %xmm0
> +///
> +static bool hasPartialRegUpdate(unsigned Opcode) {
> +  switch (Opcode) {
> +  case X86::CVTSD2SSrr:
> +  case X86::Int_CVTSD2SSrr:
> +  case X86::CVTSS2SDrr:
> +  case X86::Int_CVTSS2SDrr:
> +  case X86::RCPSSr:
> +  case X86::RCPSSr_Int:
> +  case X86::ROUNDSDr:
> +  case X86::ROUNDSSr:
> +  case X86::RSQRTSSr:
> +  case X86::RSQRTSSr_Int:
> +  case X86::SQRTSSr:
> +  case X86::SQRTSSr_Int:
> +  // AVX encoded versions
> +  case X86::VCVTSD2SSrr:
> +  case X86::Int_VCVTSD2SSrr:
> +  case X86::VCVTSS2SDrr:
> +  case X86::Int_VCVTSS2SDrr:
> +  case X86::VRCPSSr:
> +  case X86::VROUNDSDr:
> +  case X86::VROUNDSSr:
> +  case X86::VRSQRTSSr:
> +  case X86::VSQRTSSr:
> +    return true;
> +  }
> +
> +  return false;
> +}
> 
> MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
>                                                   MachineInstr *MI,
> @@ -2566,22 +2609,11 @@
>   // Check switch flag
>   if (NoFusing) return NULL;
> 
> -  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
> -    switch (MI->getOpcode()) {
> -    case X86::CVTSD2SSrr:
> -    case X86::Int_CVTSD2SSrr:
> -    case X86::CVTSS2SDrr:
> -    case X86::Int_CVTSS2SDrr:
> -    case X86::RCPSSr:
> -    case X86::RCPSSr_Int:
> -    case X86::ROUNDSDr:
> -    case X86::ROUNDSSr:
> -    case X86::RSQRTSSr:
> -    case X86::RSQRTSSr_Int:
> -    case X86::SQRTSSr:
> -    case X86::SQRTSSr_Int:
> -      return 0;
> -    }
> +  // Unless optimizing for size, don't fold to avoid partial
> +  // register update stalls
> +  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
> +      hasPartialRegUpdate(MI->getOpcode()))
> +    return 0;
> 
>   const MachineFrameInfo *MFI = MF.getFrameInfo();
>   unsigned Size = MFI->getObjectSize(FrameIndex);
> @@ -2618,22 +2650,11 @@
>   // Check switch flag
>   if (NoFusing) return NULL;
> 
> -  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
> -    switch (MI->getOpcode()) {
> -    case X86::CVTSD2SSrr:
> -    case X86::Int_CVTSD2SSrr:
> -    case X86::CVTSS2SDrr:
> -    case X86::Int_CVTSS2SDrr:
> -    case X86::RCPSSr:
> -    case X86::RCPSSr_Int:
> -    case X86::ROUNDSDr:
> -    case X86::ROUNDSSr:
> -    case X86::RSQRTSSr:
> -    case X86::RSQRTSSr_Int:
> -    case X86::SQRTSSr:
> -    case X86::SQRTSSr_Int:
> -      return 0;
> -    }
> +  // Unless optimizing for size, don't fold to avoid partial
> +  // register update stalls
> +  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
> +      hasPartialRegUpdate(MI->getOpcode()))
> +    return 0;
> 
>   // Determine the alignment of the load.
>   unsigned Alignment = 0;
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list