[llvm] r290060 - [X86][SSE][AVX-512] Convert FAND/FOR/FXOR/FANDN nodes to integer operations if they are available. This will allow a bunch of patterns to be removed.

Sat Dec 17 23:54:24 PST 2016

Author: ctopper
Date: Sun Dec 18 01:54:23 2016
New Revision: 290060

URL: http://llvm.org/viewvc/llvm-project?rev=290060&view=rev
Log:
[X86][SSE][AVX-512] Convert FAND/FOR/FXOR/FANDN nodes to integer operations if they are available. This will allow a bunch of patterns to be removed.

These nodes are only emitted for lowering FABS/FNEG/FNABS/FCOPYSIGN. Ideally we just wouldn't create these nodes if SSE2 or higher is available, but it was simple to just convert them in DAG combine.

For SSE2, AVX, and AVX512 with DQI this is no functional change as the execution domain fixing pass ensures the right domain is selected regardless of the ISD opcode.

For AVX-512 without DQI we end up using integer instructions since the floating point versions aren't available. But we were already doing that for any logical operations in code that didn't come from FABS/FNEG/FNABS/FCOPYSIGN so this seems no worse. And we get the benefit of being able to fold broadcasts now.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-arith.ll
    llvm/trunk/test/CodeGen/X86/pr13577.ll
    llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll
    llvm/trunk/test/CodeGen/X86/vec_fabs.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=290060&r1=290059&r2=290060&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Dec 18 01:54:23 2016
@@ -31905,23 +31905,24 @@ static SDValue combineFneg(SDNode *N, Se
 
 static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
                                  const X86Subtarget &Subtarget) {
-  EVT VT = N->getValueType(0);
-  if (VT.is512BitVector() && !Subtarget.hasDQI()) {
-    // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extension.
-    // These logic operations may be executed in the integer domain.
+  MVT VT = N->getSimpleValueType(0);
+  // If we have integer vector types available, use the integer opcodes.
+  if (VT.isVector() && Subtarget.hasSSE2()) {
     SDLoc dl(N);
 
-    SDValue Op0 = DAG.getBitcast(MVT::v8i64, N->getOperand(0));
-    SDValue Op1 = DAG.getBitcast(MVT::v8i64, N->getOperand(1));
-    unsigned IntOpcode = 0;
+    MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
+
+    SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
+    SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
+    unsigned IntOpcode;
     switch (N->getOpcode()) {
-      default: llvm_unreachable("Unexpected FP logic op");
-      case X86ISD::FOR: IntOpcode = ISD::OR; break;
-      case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
-      case X86ISD::FAND: IntOpcode = ISD::AND; break;
-      case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
+    default: llvm_unreachable("Unexpected FP logic op");
+    case X86ISD::FOR: IntOpcode = ISD::OR; break;
+    case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
+    case X86ISD::FAND: IntOpcode = ISD::AND; break;
+    case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
     }
-    SDValue IntOp = DAG.getNode(IntOpcode, dl, MVT::v8i64, Op0, Op1);
+    SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
     return DAG.getBitcast(VT, IntOp);
   }
   return SDValue();

Modified: llvm/trunk/test/CodeGen/X86/avx512-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-arith.ll?rev=290060&r1=290059&r2=290060&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll Sun Dec 18 01:54:23 2016
@@ -1011,8 +1011,7 @@ define <8 x float>  @test_fxor_8f32(<8 x
 ;
 ; AVX512VL-LABEL: test_fxor_8f32:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
-; AVX512VL-NEXT:    vxorps %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpxord {{.*}}(%rip){1to8}, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_fxor_8f32:

Modified: llvm/trunk/test/CodeGen/X86/pr13577.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr13577.ll?rev=290060&r1=290059&r2=290060&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr13577.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr13577.ll Sun Dec 18 01:54:23 2016
@@ -30,10 +30,9 @@ declare x86_fp80 @copysignl(x86_fp80, x8
 define float @pr26070() {
 ; CHECK-LABEL: pr26070:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    andps {{.*}}(%rip), %xmm1
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    orps {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
 ;
   %c = call float @copysignf(float 1.0, float undef) readnone

Modified: llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll?rev=290060&r1=290059&r2=290060&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll Sun Dec 18 01:54:23 2016
@@ -5,11 +5,9 @@
 define <4 x float> @v4f32(<4 x float> %a, <4 x float> %b) nounwind {
 ; AVX512VL-LABEL: v4f32:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
-; AVX512VL-NEXT:    vandps %xmm2, %xmm1, %xmm1
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
-; AVX512VL-NEXT:    vandps %xmm2, %xmm0, %xmm0
-; AVX512VL-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512VL-NEXT:    vporq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512VLDQ-LABEL: v4f32:
@@ -25,11 +23,9 @@ define <4 x float> @v4f32(<4 x float> %a
 define <8 x float> @v8f32(<8 x float> %a, <8 x float> %b) nounwind {
 ; AVX512VL-LABEL: v8f32:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX512VL-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm1, %ymm1
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; AVX512VL-NEXT:    vporq %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512VLDQ-LABEL: v8f32:
@@ -61,12 +57,19 @@ define <16 x float> @v16f32(<16 x float>
 }
 
 define <2 x double> @v2f64(<2 x double> %a, <2 x double> %b) nounwind {
-; CHECK-LABEL: v2f64:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vandps {{.*}}(%rip), %xmm1, %xmm1
-; CHECK-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; AVX512VL-LABEL: v2f64:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT:    vporq %xmm1, %xmm0, %xmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512VLDQ-LABEL: v2f64:
+; AVX512VLDQ:       ## BB#0:
+; AVX512VLDQ-NEXT:    vandps {{.*}}(%rip), %xmm1, %xmm1
+; AVX512VLDQ-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX512VLDQ-NEXT:    retq
   %tmp = tail call <2 x double> @llvm.copysign.v2f64( <2 x double> %a, <2 x double> %b )
   ret <2 x double> %tmp
 }
@@ -74,11 +77,9 @@ define <2 x double> @v2f64(<2 x double>
 define <4 x double> @v4f64(<4 x double> %a, <4 x double> %b) nounwind {
 ; AVX512VL-LABEL: v4f64:
 ; AVX512VL:       ## BB#0:
-; AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm1, %ymm1
-; AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
-; AVX512VL-NEXT:    vandps %ymm2, %ymm0, %ymm0
-; AVX512VL-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm1, %ymm1
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
+; AVX512VL-NEXT:    vporq %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512VLDQ-LABEL: v4f64:

Modified: llvm/trunk/test/CodeGen/X86/vec_fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fabs.ll?rev=290060&r1=290059&r2=290060&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fabs.ll Sun Dec 18 01:54:23 2016
@@ -10,15 +10,35 @@
 ; 2013.
 
 define <2 x double> @fabs_v2f64(<2 x double> %p) {
-; X32-LABEL: fabs_v2f64:
-; X32:       # BB#0:
-; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: fabs_v2f64:
-; X64:       # BB#0:
-; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
-; X64-NEXT:    retq
+; X32_AVX-LABEL: fabs_v2f64:
+; X32_AVX:       # BB#0:
+; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32_AVX-NEXT:    retl
+;
+; X32_AVX512VL-LABEL: fabs_v2f64:
+; X32_AVX512VL:       # BB#0:
+; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}, %xmm0, %xmm0
+; X32_AVX512VL-NEXT:    retl
+;
+; X32_AVX512VLDQ-LABEL: fabs_v2f64:
+; X32_AVX512VLDQ:       # BB#0:
+; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X32_AVX512VLDQ-NEXT:    retl
+;
+; X64_AVX-LABEL: fabs_v2f64:
+; X64_AVX:       # BB#0:
+; X64_AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX-NEXT:    retq
+;
+; X64_AVX512VL-LABEL: fabs_v2f64:
+; X64_AVX512VL:       # BB#0:
+; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX512VL-NEXT:    retq
+;
+; X64_AVX512VLDQ-LABEL: fabs_v2f64:
+; X64_AVX512VLDQ:       # BB#0:
+; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; X64_AVX512VLDQ-NEXT:    retq
   %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
   ret <2 x double> %t
 }
@@ -32,8 +52,7 @@ define <4 x float> @fabs_v4f32(<4 x floa
 ;
 ; X32_AVX512VL-LABEL: fabs_v4f32:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastss {{\.LCPI.*}}, %xmm1
-; X32_AVX512VL-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v4f32:
@@ -48,8 +67,7 @@ define <4 x float> @fabs_v4f32(<4 x floa
 ;
 ; X64_AVX512VL-LABEL: fabs_v4f32:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
-; X64_AVX512VL-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v4f32:
@@ -69,8 +87,7 @@ define <4 x double> @fabs_v4f64(<4 x dou
 ;
 ; X32_AVX512VL-LABEL: fabs_v4f64:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastsd {{\.LCPI.*}}, %ymm1
-; X32_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v4f64:
@@ -85,8 +102,7 @@ define <4 x double> @fabs_v4f64(<4 x dou
 ;
 ; X64_AVX512VL-LABEL: fabs_v4f64:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm1
-; X64_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v4f64:
@@ -106,8 +122,7 @@ define <8 x float> @fabs_v8f32(<8 x floa
 ;
 ; X32_AVX512VL-LABEL: fabs_v8f32:
 ; X32_AVX512VL:       # BB#0:
-; X32_AVX512VL-NEXT:    vbroadcastss {{\.LCPI.*}}, %ymm1
-; X32_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
 ; X32_AVX512VL-NEXT:    retl
 ;
 ; X32_AVX512VLDQ-LABEL: fabs_v8f32:
@@ -122,8 +137,7 @@ define <8 x float> @fabs_v8f32(<8 x floa
 ;
 ; X64_AVX512VL-LABEL: fabs_v8f32:
 ; X64_AVX512VL:       # BB#0:
-; X64_AVX512VL-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
-; X64_AVX512VL-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
 ; X64_AVX512VL-NEXT:    retq
 ;
 ; X64_AVX512VLDQ-LABEL: fabs_v8f32: