[llvm] r317112 - [X86] Prevent fast isel from folding loads into the instructions listed in hasPartialRegUpdate.

Wed Nov 1 11:10:06 PDT 2017

Author: ctopper
Date: Wed Nov  1 11:10:06 2017
New Revision: 317112

URL: http://llvm.org/viewvc/llvm-project?rev=317112&view=rev
Log:
[X86] Prevent fast isel from folding loads into the instructions listed in hasPartialRegUpdate.

This patch moves the check for opt size and hasPartialRegUpdate into the lower level implementation of foldMemoryOperandImpl to catch the entry point that fast isel uses.

We're still folding undef register instructions in AVX that we should also probably disable, but that's a problem for another patch.

Unfortunately, this requires reordering a bunch of functions which is why the diff is so large. I can do the function reordering separately if we want.

Differential Revision: https://reviews.llvm.org/D39402

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
    llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=317112&r1=317111&r2=317112&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Nov  1 11:10:06 2017
@@ -8389,6 +8389,11 @@ MachineInstr *X86InstrInfo::foldMemoryOp
        MI.getOpcode() == X86::PUSH64r))
     return nullptr;
 
+  // Avoid partial register update stalls unless optimizing for size.
+  // TODO: we should block undef reg update as well.
+  if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
+    return nullptr;
+
   unsigned NumOps = MI.getDesc().getNumOperands();
   bool isTwoAddr =
       NumOps > 1 && MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
@@ -8554,6 +8559,7 @@ X86InstrInfo::foldMemoryOperandImpl(Mach
 
   // Unless optimizing for size, don't fold to avoid partial
   // register update stalls
+  // TODO: we should block undef reg update as well.
   if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
     return nullptr;
 
@@ -8752,6 +8758,7 @@ MachineInstr *X86InstrInfo::foldMemoryOp
   if (NoFusing) return nullptr;
 
   // Avoid partial register update stalls unless optimizing for size.
+  // TODO: we should block undef reg update as well.
   if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
     return nullptr;
 

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll?rev=317112&r1=317111&r2=317112&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll Wed Nov  1 11:10:06 2017
@@ -55,7 +55,8 @@ entry:
 define double @single_to_double_rm(float* %x) {
 ; SSE-LABEL: single_to_double_rm:
 ; SSE:       # BB#0: # %entry
-; SSE-NEXT:    cvtss2sd (%rdi), %xmm0
+; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    cvtss2sd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: single_to_double_rm:
@@ -69,16 +70,51 @@ entry:
   ret double %conv
 }
 
+define double @single_to_double_rm_optsize(float* %x) optsize {
+; SSE-LABEL: single_to_double_rm_optsize:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    cvtss2sd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: single_to_double_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load float, float* %x, align 4
+  %conv = fpext float %0 to double
+  ret double %conv
+}
+
 define float @double_to_single_rm(double* %x) {
 ; SSE-LABEL: double_to_single_rm:
 ; SSE:       # BB#0: # %entry
-; SSE-NEXT:    cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT:    cvtsd2ss %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: double_to_single_rm:
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load double, double* %x, align 8
+  %conv = fptrunc double %0 to float
+  ret float %conv
+}
+
+define float @double_to_single_rm_optsize(double* %x) optsize {
+; SSE-LABEL: double_to_single_rm_optsize:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    cvtsd2ss (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: double_to_single_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 entry:
   %0 = load double, double* %x, align 8

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll?rev=317112&r1=317111&r2=317112&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion-x86-64.ll Wed Nov  1 11:10:06 2017
@@ -21,7 +21,8 @@ entry:
 define double @long_to_double_rm(i64* %a) {
 ; SSE2-LABEL: long_to_double_rm:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT:    movq (%rdi), %rax
+; SSE2-NEXT:    cvtsi2sdq %rax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: long_to_double_rm:
@@ -34,6 +35,22 @@ entry:
   ret double %1
 }
 
+define double @long_to_double_rm_optsize(i64* %a) optsize {
+; SSE2-LABEL: long_to_double_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2sdq (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: long_to_double_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2sdq (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = sitofp i64 %0 to double
+  ret double %1
+}
+
 define float @long_to_float_rr(i64 %a) {
 ; SSE2-LABEL: long_to_float_rr:
 ; SSE2:       # BB#0: # %entry
@@ -52,13 +69,30 @@ entry:
 define float @long_to_float_rm(i64* %a) {
 ; SSE2-LABEL: long_to_float_rm:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT:    movq (%rdi), %rax
+; SSE2-NEXT:    cvtsi2ssq %rax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: long_to_float_rm:
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
+entry:
+  %0 = load i64, i64* %a
+  %1 = sitofp i64 %0 to float
+  ret float %1
+}
+
+define float @long_to_float_rm_optsize(i64* %a) optsize {
+; SSE2-LABEL: long_to_float_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2ssq (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: long_to_float_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2ssq (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
 entry:
   %0 = load i64, i64* %a
   %1 = sitofp i64 %0 to float

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll?rev=317112&r1=317111&r2=317112&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-int-float-conversion.ll Wed Nov  1 11:10:06 2017
@@ -58,7 +58,8 @@ entry:
 define double @int_to_double_rm(i32* %a) {
 ; SSE2-LABEL: int_to_double_rm:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT:    movl (%rdi), %eax
+; SSE2-NEXT:    cvtsi2sdl %eax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: int_to_double_rm:
@@ -107,6 +108,58 @@ entry:
   ret double %1
 }
 
+define double @int_to_double_rm_optsize(i32* %a) optsize {
+; SSE2-LABEL: int_to_double_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2sdl (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: int_to_double_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2sdl (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; SSE2_X86-LABEL: int_to_double_rm_optsize:
+; SSE2_X86:       # BB#0: # %entry
+; SSE2_X86-NEXT:    pushl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT:    .cfi_offset %ebp, -8
+; SSE2_X86-NEXT:    movl %esp, %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa_register %ebp
+; SSE2_X86-NEXT:    andl $-8, %esp
+; SSE2_X86-NEXT:    subl $8, %esp
+; SSE2_X86-NEXT:    movl 8(%ebp), %eax
+; SSE2_X86-NEXT:    cvtsi2sdl (%eax), %xmm0
+; SSE2_X86-NEXT:    movsd %xmm0, (%esp)
+; SSE2_X86-NEXT:    fldl (%esp)
+; SSE2_X86-NEXT:    movl %ebp, %esp
+; SSE2_X86-NEXT:    popl %ebp
+; SSE2_X86-NEXT:    .cfi_def_cfa %esp, 4
+; SSE2_X86-NEXT:    retl
+;
+; AVX_X86-LABEL: int_to_double_rm_optsize:
+; AVX_X86:       # BB#0: # %entry
+; AVX_X86-NEXT:    pushl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    .cfi_offset %ebp, -8
+; AVX_X86-NEXT:    movl %esp, %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa_register %ebp
+; AVX_X86-NEXT:    andl $-8, %esp
+; AVX_X86-NEXT:    subl $8, %esp
+; AVX_X86-NEXT:    movl 8(%ebp), %eax
+; AVX_X86-NEXT:    vcvtsi2sdl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovsd %xmm0, (%esp)
+; AVX_X86-NEXT:    fldl (%esp)
+; AVX_X86-NEXT:    movl %ebp, %esp
+; AVX_X86-NEXT:    popl %ebp
+; AVX_X86-NEXT:    .cfi_def_cfa %esp, 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = sitofp i32 %0 to double
+  ret double %1
+}
+
 define float @int_to_float_rr(i32 %a) {
 ; SSE2-LABEL: int_to_float_rr:
 ; SSE2:       # BB#0: # %entry
@@ -148,7 +201,8 @@ entry:
 define float @int_to_float_rm(i32* %a) {
 ; SSE2-LABEL: int_to_float_rm:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT:    movl (%rdi), %eax
+; SSE2-NEXT:    cvtsi2ssl %eax, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX-LABEL: int_to_float_rm:
@@ -172,6 +226,46 @@ define float @int_to_float_rm(i32* %a) {
 ; AVX_X86:       # BB#0: # %entry
 ; AVX_X86-NEXT:    pushl %eax
 ; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
+; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX_X86-NEXT:    vcvtsi2ssl (%eax), %xmm0, %xmm0
+; AVX_X86-NEXT:    vmovss %xmm0, (%esp)
+; AVX_X86-NEXT:    flds (%esp)
+; AVX_X86-NEXT:    popl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 4
+; AVX_X86-NEXT:    retl
+entry:
+  %0 = load i32, i32* %a
+  %1 = sitofp i32 %0 to float
+  ret float %1
+}
+
+define float @int_to_float_rm_optsize(i32* %a) optsize {
+; SSE2-LABEL: int_to_float_rm_optsize:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    cvtsi2ssl (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: int_to_float_rm_optsize:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcvtsi2ssl (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; SSE2_X86-LABEL: int_to_float_rm_optsize:
+; SSE2_X86:       # BB#0: # %entry
+; SSE2_X86-NEXT:    pushl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 8
+; SSE2_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE2_X86-NEXT:    cvtsi2ssl (%eax), %xmm0
+; SSE2_X86-NEXT:    movss %xmm0, (%esp)
+; SSE2_X86-NEXT:    flds (%esp)
+; SSE2_X86-NEXT:    popl %eax
+; SSE2_X86-NEXT:    .cfi_def_cfa_offset 4
+; SSE2_X86-NEXT:    retl
+;
+; AVX_X86-LABEL: int_to_float_rm_optsize:
+; AVX_X86:       # BB#0: # %entry
+; AVX_X86-NEXT:    pushl %eax
+; AVX_X86-NEXT:    .cfi_def_cfa_offset 8
 ; AVX_X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; AVX_X86-NEXT:    vcvtsi2ssl (%eax), %xmm0, %xmm0
 ; AVX_X86-NEXT:    vmovss %xmm0, (%esp)