[llvm-commits] [llvm] r139854 - /llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
Chris Lattner
clattner at apple.com
Thu Sep 15 15:55:23 PDT 2011
On Sep 15, 2011, at 2:42 PM, Bruno Cardoso Lopes wrote:
> Author: bruno
> Date: Thu Sep 15 16:42:23 2011
> New Revision: 139854
>
> URL: http://llvm.org/viewvc/llvm-project?rev=139854&view=rev
> Log:
> Factor out partial register update checks for some SSE instructions.
> Also add the AVX versions and add comments!
Hi Bruno,
I know it isn't your fault, but could hasPartialRegUpdate become a "TSFlag" bit so it can be in the .td files instead of .cpp code?
-Chris
>
> Modified:
> llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=139854&r1=139853&r2=139854&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu Sep 15 16:42:23 2011
> @@ -2558,6 +2558,49 @@
> return NULL;
> }
>
> +/// hasPartialRegUpdate - Return true for all instructions that only update
> +/// the first 32 or 64-bits of the destination register and leave the rest
> +/// unmodified. This can be used to avoid folding loads if the instructions
> +/// only update part of the destination register, and the non-updated part is
> +/// not needed. e.g. cvtss2sd, sqrtss. Unfolding the load from these
> +/// instructions breaks the partial register dependency and it can improve
> +/// performance. e.g.:
> +///
> +/// movss (%rdi), %xmm0
> +/// cvtss2sd %xmm0, %xmm0
> +///
> +/// Instead of
> +/// cvtss2sd (%rdi), %xmm0
> +///
> +static bool hasPartialRegUpdate(unsigned Opcode) {
> + switch (Opcode) {
> + case X86::CVTSD2SSrr:
> + case X86::Int_CVTSD2SSrr:
> + case X86::CVTSS2SDrr:
> + case X86::Int_CVTSS2SDrr:
> + case X86::RCPSSr:
> + case X86::RCPSSr_Int:
> + case X86::ROUNDSDr:
> + case X86::ROUNDSSr:
> + case X86::RSQRTSSr:
> + case X86::RSQRTSSr_Int:
> + case X86::SQRTSSr:
> + case X86::SQRTSSr_Int:
> + // AVX encoded versions
> + case X86::VCVTSD2SSrr:
> + case X86::Int_VCVTSD2SSrr:
> + case X86::VCVTSS2SDrr:
> + case X86::Int_VCVTSS2SDrr:
> + case X86::VRCPSSr:
> + case X86::VROUNDSDr:
> + case X86::VROUNDSSr:
> + case X86::VRSQRTSSr:
> + case X86::VSQRTSSr:
> + return true;
> + }
> +
> + return false;
> +}
>
> MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
> MachineInstr *MI,
> @@ -2566,22 +2609,11 @@
> // Check switch flag
> if (NoFusing) return NULL;
>
> - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
> - switch (MI->getOpcode()) {
> - case X86::CVTSD2SSrr:
> - case X86::Int_CVTSD2SSrr:
> - case X86::CVTSS2SDrr:
> - case X86::Int_CVTSS2SDrr:
> - case X86::RCPSSr:
> - case X86::RCPSSr_Int:
> - case X86::ROUNDSDr:
> - case X86::ROUNDSSr:
> - case X86::RSQRTSSr:
> - case X86::RSQRTSSr_Int:
> - case X86::SQRTSSr:
> - case X86::SQRTSSr_Int:
> - return 0;
> - }
> + // Unless optimizing for size, don't fold to avoid partial
> + // register update stalls
> + if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
> + hasPartialRegUpdate(MI->getOpcode()))
> + return 0;
>
> const MachineFrameInfo *MFI = MF.getFrameInfo();
> unsigned Size = MFI->getObjectSize(FrameIndex);
> @@ -2618,22 +2650,11 @@
> // Check switch flag
> if (NoFusing) return NULL;
>
> - if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
> - switch (MI->getOpcode()) {
> - case X86::CVTSD2SSrr:
> - case X86::Int_CVTSD2SSrr:
> - case X86::CVTSS2SDrr:
> - case X86::Int_CVTSS2SDrr:
> - case X86::RCPSSr:
> - case X86::RCPSSr_Int:
> - case X86::ROUNDSDr:
> - case X86::ROUNDSSr:
> - case X86::RSQRTSSr:
> - case X86::RSQRTSSr_Int:
> - case X86::SQRTSSr:
> - case X86::SQRTSSr_Int:
> - return 0;
> - }
> + // Unless optimizing for size, don't fold to avoid partial
> + // register update stalls
> + if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
> + hasPartialRegUpdate(MI->getOpcode()))
> + return 0;
>
> // Determine the alignment of the load.
> unsigned Alignment = 0;
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list