[llvm] r347526 - [x86] limit transform for select-of-fp-constants

Sun Nov 25 09:27:03 PST 2018

Author: spatel
Date: Sun Nov 25 09:27:02 2018
New Revision: 347526

URL: http://llvm.org/viewvc/llvm-project?rev=347526&view=rev
Log:
[x86] limit transform for select-of-fp-constants

This should likely be adjusted to limit this transform
further, but these diffs should be clear wins.

If we have blendv/conditional move, then we should assume 
those are cheap ops. The loads become independent of the
compare, so those can be speculated before we need to use 
the values in the blend/mov.

Modified:
    llvm/trunk/include/llvm/CodeGen/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/test/CodeGen/X86/avx512-cmp.ll
    llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
    llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll
    llvm/trunk/test/CodeGen/X86/vselect-zero.ll

Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=347526&r1=347525&r2=347526&view=diff
==============================================================================

--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Sun Nov 25 09:27:02 2018
@@ -269,6 +269,14 @@ public:
     return true;
   }
 
+  /// Return true if it is profitable to convert a select of FP constants into
+  /// a constant pool load whose address depends on the select condition. The
+  /// parameter may be used to differentiate a select with FP compare from
+  /// integer compare.
+  virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+    return true;
+  }
+
   /// Return true if multiple condition registers are available.
   bool hasMultipleConditionRegisters() const {
     return HasMultipleConditionRegisters;

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=347526&r1=347525&r2=347526&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sun Nov 25 09:27:02 2018
@@ -18129,6 +18129,9 @@ SDValue DAGCombiner::foldSelectCCToShift
 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
     ISD::CondCode CC) {
+  if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
+    return SDValue();
+
   // If we are before legalize types, we want the other legalization to happen
   // first (for example, to avoid messing with soft float).
   auto *TV = dyn_cast<ConstantFPSDNode>(N2);

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=347526&r1=347525&r2=347526&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Nov 25 09:27:02 2018
@@ -4767,6 +4767,14 @@ bool X86TargetLowering::shouldConvertCon
   return true;
 }
 
+bool X86TargetLowering::reduceSelectOfFPConstantLoads(bool IsFPSetCC) const {
+  // If we are using XMM registers in the ABI and the condition of the select is
+  // a floating-point compare and we have blendv or conditional move, then it is
+  // cheaper to select instead of doing a cross-register move and creating a
+  // load that depends on the compare result.
+  return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
+}
+
 bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
   // TODO: It might be a win to ease or lift this restriction, but the generic
   // folds in DAGCombiner conflict with vector folds for an AVX512 target.

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=347526&r1=347525&r2=347526&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sun Nov 25 09:27:02 2018
@@ -1041,6 +1041,8 @@ namespace llvm {
     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                            Type *Ty) const override;
 
+    bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const override;
+
     bool convertSelectOfConstantsToMath(EVT VT) const override;
 
     bool decomposeMulByConstant(EVT VT, SDValue C) const override;

Modified: llvm/trunk/test/CodeGen/X86/avx512-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cmp.ll?rev=347526&r1=347525&r2=347526&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cmp.ll Sun Nov 25 09:27:02 2018
@@ -69,10 +69,10 @@ define float @test5(float %p) #0 {
 ; ALL-NEXT:  ## %bb.2: ## %return
 ; ALL-NEXT:    retq
 ; ALL-NEXT:  LBB3_1: ## %if.end
-; ALL-NEXT:    seta %al
-; ALL-NEXT:    movzbl %al, %eax
-; ALL-NEXT:    leaq {{.*}}(%rip), %rcx
+; ALL-NEXT:    vcmpltss %xmm0, %xmm1, %k1
+; ALL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; ALL-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; ALL-NEXT:    retq
 entry:
   %cmp = fcmp oeq float %p, 0.000000e+00

Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=347526&r1=347525&r2=347526&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Sun Nov 25 09:27:02 2018
@@ -1148,9 +1148,10 @@ define float @test5(float %p) #0 {
 ; GENERIC-NEXT:  # %bb.2: # %return
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ; GENERIC-NEXT:  .LBB67_1: # %if.end
-; GENERIC-NEXT:    seta %al # sched: [2:1.00]
-; GENERIC-NEXT:    movzbl %al, %eax # sched: [1:0.33]
+; GENERIC-NEXT:    vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00]
+; GENERIC-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
 ; GENERIC-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; GENERIC-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; SKX-LABEL: test5:
@@ -1162,9 +1163,10 @@ define float @test5(float %p) #0 {
 ; SKX-NEXT:  # %bb.2: # %return
 ; SKX-NEXT:    retq # sched: [7:1.00]
 ; SKX-NEXT:  .LBB67_1: # %if.end
-; SKX-NEXT:    seta %al # sched: [2:1.00]
-; SKX-NEXT:    movzbl %al, %eax # sched: [1:0.25]
+; SKX-NEXT:    vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00]
+; SKX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
 ; SKX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00]
 ; SKX-NEXT:    retq # sched: [7:1.00]
 entry:
   %cmp = fcmp oeq float %p, 0.000000e+00

Modified: llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll?rev=347526&r1=347525&r2=347526&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select-of-fp-constants.ll Sun Nov 25 09:27:02 2018
@@ -76,18 +76,18 @@ define float @fcmp_select_fp_constants(f
 ;
 ; X64_AVX2-LABEL: fcmp_select_fp_constants:
 ; X64_AVX2:       # %bb.0:
+; X64_AVX2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64_AVX2-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
 ; X64_AVX2-NEXT:    vcmpneqss {{.*}}(%rip), %xmm0, %xmm0
-; X64_AVX2-NEXT:    vmovd %xmm0, %eax
-; X64_AVX2-NEXT:    andl $1, %eax
-; X64_AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64_AVX2-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
 ; X64_AVX2-NEXT:    retq
 ;
 ; X64_AVX512F-LABEL: fcmp_select_fp_constants:
 ; X64_AVX512F:       # %bb.0:
-; X64_AVX512F-NEXT:    vcmpneqss {{.*}}(%rip), %xmm0, %k0
-; X64_AVX512F-NEXT:    kmovw %k0, %eax
-; X64_AVX512F-NEXT:    movzwl %ax, %eax
+; X64_AVX512F-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64_AVX512F-NEXT:    vcmpneqss {{.*}}(%rip), %xmm0, %k1
 ; X64_AVX512F-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64_AVX512F-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; X64_AVX512F-NEXT:    retq
  %c = fcmp une float %x, -4.0
  %r = select i1 %c, float 42.0, float 23.0

Modified: llvm/trunk/test/CodeGen/X86/vselect-zero.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect-zero.ll?rev=347526&r1=347525&r2=347526&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-zero.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect-zero.ll Sun Nov 25 09:27:02 2018
@@ -129,9 +129,9 @@ define double @fsel_nonzero_constants(do
 ; AVX-LABEL: fsel_nonzero_constants:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vcmpeqsd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    andl $1, %eax
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
+; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
 ; AVX-NEXT:    retq
   %cond = fcmp oeq double %x, %y
   %r = select i1 %cond, double 12.0, double 42.0