[llvm] r254913 - VX-512: Fixed a bug in FP logic operation lowering
Elena Demikhovsky via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 7 06:33:34 PST 2015
Author: delena
Date: Mon Dec 7 08:33:34 2015
New Revision: 254913
URL: http://llvm.org/viewvc/llvm-project?rev=254913&view=rev
Log:
VX-512: Fixed a bug in FP logic operation lowering
FP logic instructions are supported in DQ extension on AVX-512 target.
I use integer operations instead.
Added tests.
I also enabled FABS in this patch in order to check ANDPS.
The operations are FOR, FXOR, FAND, FANDN.
The instructions, that supported for 512-bit vector under DQ are:
VORPS/PD, VXORPS/PD, VANDPS/PD, FANDNPS/PD.
Differential Revision: http://reviews.llvm.org/D15110
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx-logic.ll
llvm/trunk/test/CodeGen/X86/avx512-arith.ll
llvm/trunk/test/CodeGen/X86/vec_fabs.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=254913&r1=254912&r2=254913&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Dec 7 08:33:34 2015
@@ -1340,6 +1340,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FDIV, MVT::v16f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v16f32, Legal);
setOperationAction(ISD::FNEG, MVT::v16f32, Custom);
+ setOperationAction(ISD::FABS, MVT::v16f32, Custom);
setOperationAction(ISD::FADD, MVT::v8f64, Legal);
setOperationAction(ISD::FSUB, MVT::v8f64, Legal);
@@ -1347,6 +1348,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::FDIV, MVT::v8f64, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f64, Legal);
setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
+ setOperationAction(ISD::FABS, MVT::v8f64, Custom);
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
@@ -26339,6 +26341,31 @@ static SDValue PerformFNEGCombine(SDNode
return SDValue();
}
+static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (VT.is512BitVector() && !Subtarget->hasDQI()) {
+ // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention.
+ // These logic operations may be executed in the integer domain.
+ SDLoc dl(N);
+ MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
+
+ SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
+ unsigned IntOpcode = 0;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected FP logic op");
+ case X86ISD::FOR: IntOpcode = ISD::OR; break;
+ case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
+ case X86ISD::FAND: IntOpcode = ISD::AND; break;
+ case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
+ }
+ SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
+ return DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
+ }
+ return SDValue();
+}
/// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
@@ -26354,19 +26381,7 @@ static SDValue PerformFORCombine(SDNode
if (C->getValueAPF().isPosZero())
return N->getOperand(0);
- EVT VT = N->getValueType(0);
- if (VT.is512BitVector() && !Subtarget->hasDQI()) {
- SDLoc dl(N);
- MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
- MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
-
- SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
- unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR;
- SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
- return DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
- }
- return SDValue();
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
@@ -26391,7 +26406,8 @@ static SDValue PerformFMinFMaxCombine(SD
}
/// Do target-specific dag combines on X86ISD::FAND nodes.
-static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
// FAND(0.0, x) -> 0.0
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
@@ -26402,11 +26418,12 @@ static SDValue PerformFANDCombine(SDNode
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
- return SDValue();
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
/// Do target-specific dag combines on X86ISD::FANDN nodes
-static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
// FANDN(0.0, x) -> x
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
if (C->getValueAPF().isPosZero())
@@ -26417,7 +26434,7 @@ static SDValue PerformFANDNCombine(SDNod
if (C->getValueAPF().isPosZero())
return N->getOperand(1);
- return SDValue();
+ return lowerX86FPLogicOp(N, DAG, Subtarget);
}
static SDValue PerformBTCombine(SDNode *N,
@@ -27233,8 +27250,8 @@ SDValue X86TargetLowering::PerformDAGCom
case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget);
case X86ISD::FMIN:
case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG);
- case X86ISD::FAND: return PerformFANDCombine(N, DAG);
- case X86ISD::FANDN: return PerformFANDNCombine(N, DAG);
+ case X86ISD::FAND: return PerformFANDCombine(N, DAG, Subtarget);
+ case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ANY_EXTEND:
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=254913&r1=254912&r2=254913&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Mon Dec 7 08:33:34 2015
@@ -770,6 +770,7 @@ def HasVLX : Predicate<"Subtarget-
AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">;
def NoVLX : Predicate<"!Subtarget->hasVLX()">;
def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
+def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">;
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=254913&r1=254912&r2=254913&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Mon Dec 7 08:33:34 2015
@@ -2906,7 +2906,7 @@ let isCodeGenOnly = 1 in {
// Multiclass for vectors using the X86 logical operation aliases for FP.
multiclass sse12_fp_packed_vector_logical_alias<
bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
- let Predicates = [HasAVX, NoVLX] in {
+ let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32, SSEPackedSingle, itins, 0>,
PS, VEX_4V;
Modified: llvm/trunk/test/CodeGen/X86/avx-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-logic.ll?rev=254913&r1=254912&r2=254913&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-logic.ll Mon Dec 7 08:33:34 2015
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
; CHECK-LABEL: andpd256:
Modified: llvm/trunk/test/CodeGen/X86/avx512-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-arith.ll?rev=254913&r1=254912&r2=254913&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll Mon Dec 7 08:33:34 2015
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
@@ -823,3 +824,73 @@ define <16 x float> @test_fxor(<16 x fl
ret <16 x float>%res
}
+define <8 x float> @test_fxor_8f32(<8 x float> %a) {
+; CHECK-LABEL: test_fxor_8f32:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vxorps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+ ret <8 x float>%res
+}
+
+define <8 x double> @fabs_v8f64(<8 x double> %p)
+; AVX512F-LABEL: fabs_v8f64:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fabs_v8f64:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: fabs_v8f64:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: fabs_v8f64:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: fabs_v8f64:
+; SKX: ## BB#0:
+; SKX-NEXT: vandpd {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT: retq
+{
+ %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
+ ret <8 x double> %t
+}
+declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
+
+define <16 x float> @fabs_v16f32(<16 x float> %p)
+; AVX512F-LABEL: fabs_v16f32:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: fabs_v16f32:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: fabs_v16f32:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT: retq
+;
+; AVX512DQ-LABEL: fabs_v16f32:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT: retq
+;
+; SKX-LABEL: fabs_v16f32:
+; SKX: ## BB#0:
+; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT: retq
+{
+ %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
+ ret <16 x float> %t
+}
+declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
Modified: llvm/trunk/test/CodeGen/X86/vec_fabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fabs.ll?rev=254913&r1=254912&r2=254913&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fabs.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fabs.ll Mon Dec 7 08:33:34 2015
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
-
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s
define <2 x double> @fabs_v2f64(<2 x double> %p)
{
More information about the llvm-commits
mailing list