[llvm-commits] [llvm] r77407 - in /llvm/trunk: include/llvm/IntrinsicsX86.td lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/sse41.ll
Evan Cheng
evan.cheng at apple.com
Tue Jul 28 18:57:19 PDT 2009
Is there a way to make use of ptest for non-intrinsic form of
comparison of two SSE values?
Evan
On Jul 28, 2009, at 5:28 PM, Eric Christopher wrote:
> Author: echristo
> Date: Tue Jul 28 19:28:05 2009
> New Revision: 77407
>
> URL: http://llvm.org/viewvc/llvm-project?rev=77407&view=rev
> Log:
> Add support for gcc __builtin_ia32_ptest{z,c,nzc} intrinsics. Lower
> to ptest instruction plus setcc. Revamp ptest instruction. Add test.
>
> Modified:
> llvm/trunk/include/llvm/IntrinsicsX86.td
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.h
> llvm/trunk/lib/Target/X86/X86InstrSSE.td
> llvm/trunk/test/CodeGen/X86/sse41.ll
>
> Modified: llvm/trunk/include/llvm/IntrinsicsX86.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IntrinsicsX86.td?rev=77407&r1=77406&r2=77407&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/include/llvm/IntrinsicsX86.td (original)
> +++ llvm/trunk/include/llvm/IntrinsicsX86.td Tue Jul 28 19:28:05 2009
> @@ -864,6 +864,18 @@
> Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
> }
>
> +// Test instruction with bitwise comparison.
> +let TargetPrefix = "x86" in { // All intrinsics start with
> "llvm.x86.".
> + def int_x86_sse41_ptestz :
> GCCBuiltin<"__builtin_ia32_ptestz128">,
> + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
> + [IntrNoMem]>;
> + def int_x86_sse41_ptestc :
> GCCBuiltin<"__builtin_ia32_ptestc128">,
> + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
> + [IntrNoMem]>;
> + def int_x86_sse41_ptestnzc :
> GCCBuiltin<"__builtin_ia32_ptestnzc128">,
> + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
> + [IntrNoMem]>;
> +}
>
> //
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> // MMX
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=77407&r1=77406&r2=77407&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 28
> 19:28:05 2009
> @@ -6200,6 +6200,36 @@
> DAG.getConstant(X86CC, MVT::i8),
> Cond);
> return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
> }
> + // ptest intrinsics. The intrinsic these come from are designed
> to return
> + // a boolean value, not just an instruction so lower it to the
> ptest
> + // pattern and a conditional move to the result.
> + case Intrinsic::x86_sse41_ptestz:
> + case Intrinsic::x86_sse41_ptestc:
> + case Intrinsic::x86_sse41_ptestnzc:{
> + unsigned X86CC = 0;
> + switch (IntNo) {
> + default: break;
> + case Intrinsic::x86_sse41_ptestz:
> + // ZF = 1
> + X86CC = X86::COND_E;
> + break;
> + case Intrinsic::x86_sse41_ptestc:
> + // CF = 1
> + X86CC = X86::COND_B;
> + break;
> + case Intrinsic::x86_sse41_ptestnzc:
> + // ZF and CF = 0
> + X86CC = X86::COND_A;
> + break;
> + }
> +
> + SDValue LHS = Op.getOperand(1);
> + SDValue RHS = Op.getOperand(2);
> + SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS,
> RHS);
> + SDValue CC = DAG.getConstant(X86CC, MVT::i8);
> + SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC,
> Test);
> + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
> + }
>
> // Fix vector shift instructions where the last operand is a non-
> immediate
> // i32 value.
> @@ -7048,6 +7078,7 @@
> case X86ISD::INC: return "X86ISD::INC";
> case X86ISD::DEC: return "X86ISD::DEC";
> case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
> + case X86ISD::PTEST: return "X86ISD::PTEST";
> }
> }
>
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=77407&r1=77406&r2=77407&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Jul 28 19:28:05
> 2009
> @@ -244,7 +244,10 @@
> INC, DEC,
>
> // MUL_IMM - X86 specific multiply by immediate.
> - MUL_IMM
> + MUL_IMM,
> +
> + // PTEST - Vector bitwise comparisons
> + PTEST
> };
> }
>
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=77407&r1=77406&r2=77407&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Jul 28 19:28:05 2009
> @@ -69,6 +69,9 @@
> def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
> def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
>
> +def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>,
> SDTCisVT<1, v4f32>]>;
> +def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
> +
> //
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> // SSE Complex Patterns
> //
> =
> =
> =
> ----------------------------------------------------------------------=
> ==//
> @@ -3618,11 +3621,17 @@
> def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:
> $src3),
> (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
>
> +// ptest instruction we'll lower to this in X86ISelLowering
> primarily from
> +// the intel intrinsic that corresponds to this.
> let Defs = [EFLAGS] in {
> def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1,
> VR128:$src2),
> - "ptest \t{$src2, $src1|$src1, $src2}", []>,
> OpSize;
> + "ptest \t{$src2, $src1|$src1, $src2}",
> + [(X86ptest VR128:$src1, VR128:$src2),
> + (implicit EFLAGS)]>, OpSize;
> def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1,
> i128mem:$src2),
> - "ptest \t{$src2, $src1|$src1, $src2}", []>,
> OpSize;
> + "ptest \t{$src2, $src1|$src1, $src2}",
> + [(X86ptest VR128:$src1, (load addr:$src2)),
> + (implicit EFLAGS)]>, OpSize;
> }
>
> def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins
> i128mem:$src),
>
> Modified: llvm/trunk/test/CodeGen/X86/sse41.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse41.ll?rev=77407&r1=77406&r2=77407&view=diff
>
> =
> =
> =
> =
> =
> =
> =
> =
> ======================================================================
> --- llvm/trunk/test/CodeGen/X86/sse41.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/sse41.ll Tue Jul 28 19:28:05 2009
> @@ -181,4 +181,19 @@
>
> ; X64: _insertps_3:
> ; X64: insertps $0, %xmm1, %xmm0
> -}
> \ No newline at end of file
> +}
> +
> +define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
> + %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4
> x float> %t2) nounwind readnone
> + ret i32 %tmp1
> +; X32: _ptestz_1:
> +; X32: ptest %xmm1, %xmm0
> +; X32: sete %al
> +
> +; X64: _ptestz_1:
> +; X64: ptest %xmm1, %xmm0
> +; X64: sete %al
> +}
> +
> +declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>)
> nounwind readnone
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list