[llvm] r265266 - [X86][SSE] Support for MOVMSK signbit extraction instructions
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 3 11:22:03 PDT 2016
Author: rksimon
Date: Sun Apr 3 13:22:03 2016
New Revision: 265266
URL: http://llvm.org/viewvc/llvm-project?rev=265266&view=rev
Log:
[X86][SSE] Support for MOVMSK signbit extraction instructions
Add support for lowering with the MOVMSK instruction to extract vector element signbits to a GPR.
This is an early step towards more optimal handling of vector comparison results.
Differential Revision: http://reviews.llvm.org/D18741
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=265266&r1=265265&r2=265266&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Apr 3 13:22:03 2016
@@ -21888,6 +21888,7 @@ const char *X86TargetLowering::getTarget
case X86ISD::AND: return "X86ISD::AND";
case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
+ case X86ISD::MOVMSK: return "X86ISD::MOVMSK";
case X86ISD::PTEST: return "X86ISD::PTEST";
case X86ISD::TESTP: return "X86ISD::TESTP";
case X86ISD::TESTM: return "X86ISD::TESTM";
@@ -24018,33 +24019,9 @@ void X86TargetLowering::computeKnownBits
case X86ISD::SETCC:
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned NumLoBits = 0;
- switch (IntId) {
- default: break;
- case Intrinsic::x86_sse_movmsk_ps:
- case Intrinsic::x86_avx_movmsk_ps_256:
- case Intrinsic::x86_sse2_movmsk_pd:
- case Intrinsic::x86_avx_movmsk_pd_256:
- case Intrinsic::x86_mmx_pmovmskb:
- case Intrinsic::x86_sse2_pmovmskb_128:
- case Intrinsic::x86_avx2_pmovmskb: {
- // High bits of movmskp{s|d}, pmovmskb are known zero.
- switch (IntId) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_sse_movmsk_ps: NumLoBits = 4; break;
- case Intrinsic::x86_avx_movmsk_ps_256: NumLoBits = 8; break;
- case Intrinsic::x86_sse2_movmsk_pd: NumLoBits = 2; break;
- case Intrinsic::x86_avx_movmsk_pd_256: NumLoBits = 4; break;
- case Intrinsic::x86_mmx_pmovmskb: NumLoBits = 8; break;
- case Intrinsic::x86_sse2_pmovmskb_128: NumLoBits = 16; break;
- case Intrinsic::x86_avx2_pmovmskb: NumLoBits = 32; break;
- }
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
- break;
- }
- }
+ case X86ISD::MOVMSK: {
+ unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements();
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
break;
}
}
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=265266&r1=265265&r2=265266&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sun Apr 3 13:22:03 2016
@@ -352,6 +352,9 @@ namespace llvm {
// X86-specific multiply by immediate.
MUL_IMM,
+ // Vector sign bit extraction.
+ MOVMSK,
+
// Vector bitwise comparisons.
PTEST,
Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=265266&r1=265265&r2=265266&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sun Apr 3 13:22:03 2016
@@ -276,6 +276,9 @@ def X86ktest : SDNode<"X86ISD::KTEST",
def X86testm : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>;
def X86testnm : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>;
+def X86movmsk : SDNode<"X86ISD::MOVMSK",
+ SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>;
+
def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>;
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=265266&r1=265265&r2=265266&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Apr 3 13:22:03 2016
@@ -2765,25 +2765,23 @@ let Predicates = [HasAVX1Only] in {
//===----------------------------------------------------------------------===//
/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
-multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
- Domain d> {
+multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt,
+ string asm, Domain d> {
def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
- [(set GR32orGR64:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>,
+ [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], IIC_SSE_MOVMSK, d>,
Sched<[WriteVecLogic]>;
}
let Predicates = [HasAVX] in {
- defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
- "movmskps", SSEPackedSingle>, PS, VEX;
- defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
- "movmskpd", SSEPackedDouble>, PD, VEX;
- defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
- "movmskps", SSEPackedSingle>, PS,
- VEX, VEX_L;
- defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
- "movmskpd", SSEPackedDouble>, PD,
- VEX, VEX_L;
+ defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
+ SSEPackedSingle>, PS, VEX;
+ defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
+ SSEPackedDouble>, PD, VEX;
+ defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps",
+ SSEPackedSingle>, PS, VEX, VEX_L;
+ defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
+ SSEPackedDouble>, PD, VEX, VEX_L;
def : Pat<(i32 (X86fgetsign FR32:$src)),
(VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -2797,9 +2795,9 @@ let Predicates = [HasAVX] in {
(VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>;
}
-defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
SSEPackedSingle>, PS;
-defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
SSEPackedDouble>, PD;
def : Pat<(i32 (X86fgetsign FR32:$src)),
@@ -4665,20 +4663,20 @@ let ExeDomain = SSEPackedInt, SchedRW =
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+ [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))],
IIC_SSE_MOVMSK>, VEX;
let Predicates = [HasAVX2] in {
def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR256:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32orGR64:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>,
+ [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,
VEX, VEX_L;
}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+ [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))],
IIC_SSE_MOVMSK>;
} // ExeDomain = SSEPackedInt
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=265266&r1=265265&r2=265266&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sun Apr 3 13:22:03 2016
@@ -321,6 +321,8 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx_max_ps_256, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(avx_min_pd_256, INTR_TYPE_2OP, X86ISD::FMIN, 0),
X86_INTRINSIC_DATA(avx_min_ps_256, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(avx_movmsk_pd_256, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
+ X86_INTRINSIC_DATA(avx_movmsk_ps_256, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(avx_rcp_ps_256, INTR_TYPE_1OP, X86ISD::FRCP, 0),
X86_INTRINSIC_DATA(avx_rsqrt_ps_256, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
X86_INTRINSIC_DATA(avx_sqrt_pd_256, INTR_TYPE_1OP, ISD::FSQRT, 0),
@@ -354,6 +356,7 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, ISD::UMIN, 0),
X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(avx2_pmovmskb, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0),
@@ -2184,6 +2187,7 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(sse_movmsk_ps, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse_rcp_ps, INTR_TYPE_1OP, X86ISD::FRCP, 0),
X86_INTRINSIC_DATA(sse_rsqrt_ps, INTR_TYPE_1OP, X86ISD::FRSQRT, 0),
X86_INTRINSIC_DATA(sse_sqrt_ps, INTR_TYPE_1OP, ISD::FSQRT, 0),
@@ -2201,6 +2205,7 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE),
X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0),
X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
@@ -2210,6 +2215,7 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(sse2_pmaxu_b, INTR_TYPE_2OP, ISD::UMAX, 0),
X86_INTRINSIC_DATA(sse2_pmins_w, INTR_TYPE_2OP, ISD::SMIN, 0),
X86_INTRINSIC_DATA(sse2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),
+ X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
X86_INTRINSIC_DATA(sse2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
X86_INTRINSIC_DATA(sse2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
More information about the llvm-commits
mailing list