[llvm] r208210 - AArch64/ARM64: optimise vector selects & enable test

Jim Grosbach grosbach at apple.com
Wed Aug 27 12:29:51 PDT 2014


Hey Tim,

llvm-stress tracked down a crasher input for this optimization. Filed http://llvm.org/bugs/show_bug.cgi?id=20775 with a reduced testcase. “llvm-stress —seed=18027 | llc -mtriple=arm64-apple-ios” produced the original.

-Jim

> On May 7, 2014, at 7:10 AM, Tim Northover <tnorthover at apple.com> wrote:
> 
> Author: tnorthover
> Date: Wed May  7 09:10:27 2014
> New Revision: 208210
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=208210&view=rev
> Log:
> AArch64/ARM64: optimise vector selects & enable test
> 
> When performing a scalar comparison that feeds into a vector select,
> it's actually better to do the comparison on the vector side: the
> scalar route would be "CMP -> CSEL -> DUP", the vector is "CM -> DUP"
> since the vector comparisons are all mask based.
> 
> Added:
>    llvm/trunk/test/CodeGen/ARM64/aarch64-neon-select_cc.ll
>      - copied, changed from r208209, llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll
> Modified:
>    llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp
>    llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll
> 
> Modified: llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp?rev=208210&r1=208209&r2=208210&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/ARM64/ARM64ISelLowering.cpp Wed May  7 09:10:27 2014
> @@ -366,6 +366,7 @@ ARM64TargetLowering::ARM64TargetLowering
> 
>   setTargetDAGCombine(ISD::MUL);
> 
> +  setTargetDAGCombine(ISD::SELECT);
>   setTargetDAGCombine(ISD::VSELECT);
> 
>   MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
> @@ -7121,6 +7122,44 @@ static SDValue performVSelectCombine(SDN
>                      IfTrue, IfFalse);
> }
> 
> +/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
> +/// the compare-mask instructions rather than going via NZCV, even if LHS and
> +/// RHS are really scalar. This replaces any scalar setcc in the above pattern
> +/// with a vector one followed by a DUP shuffle on the result.
> +static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
> +  SDValue N0 = N->getOperand(0);
> +  EVT ResVT = N->getValueType(0);
> +
> +  if (!N->getOperand(1).getValueType().isVector())
> +    return SDValue();
> +
> +  if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
> +    return SDValue();
> +
> +  SDLoc DL(N0);
> +
> +  EVT SrcVT = N0.getOperand(0).getValueType();
> +  SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT,
> +                           ResVT.getSizeInBits() / SrcVT.getSizeInBits());
> +  EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
> +
> +  // First perform a vector comparison, where lane 0 is the one we're interested
> +  // in.
> +  SDValue LHS =
> +      DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
> +  SDValue RHS =
> +      DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
> +  SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
> +
> +  // Now duplicate the comparison mask we want across all other lanes.
> +  SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
> +  SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask.data());
> +  Mask = DAG.getNode(ISD::BITCAST, DL, ResVT.changeVectorElementTypeToInteger(),
> +                     Mask);
> +
> +  return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
> +}
> +
> SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
>                                                DAGCombinerInfo &DCI) const {
>   SelectionDAG &DAG = DCI.DAG;
> @@ -7149,6 +7188,8 @@ SDValue ARM64TargetLowering::PerformDAGC
>     return performBitcastCombine(N, DCI, DAG);
>   case ISD::CONCAT_VECTORS:
>     return performConcatVectorsCombine(N, DCI, DAG);
> +  case ISD::SELECT:
> +    return performSelectCombine(N, DAG);
>   case ISD::VSELECT:
>     return performVSelectCombine(N, DCI.DAG);
>   case ISD::STORE:
> 
> Modified: llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll?rev=208210&r1=208209&r2=208210&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll (original)
> +++ llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll Wed May  7 09:10:27 2014
> @@ -1,5 +1,5 @@
> ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
> -
> +; arm64 has separate copy of this test due to different codegen.
> define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
> ; CHECK-LABEL: test_select_cc_v8i8_i8:
> ; CHECK: and	w0, w0, #0xff
> 
> Copied: llvm/trunk/test/CodeGen/ARM64/aarch64-neon-select_cc.ll (from r208209, llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll)
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM64/aarch64-neon-select_cc.ll?p2=llvm/trunk/test/CodeGen/ARM64/aarch64-neon-select_cc.ll&p1=llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll&r1=208209&r2=208210&rev=208210&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AArch64/neon-select_cc.ll (original)
> +++ llvm/trunk/test/CodeGen/ARM64/aarch64-neon-select_cc.ll Wed May  7 09:10:27 2014
> @@ -1,12 +1,12 @@
> -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
> +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
> 
> define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
> ; CHECK-LABEL: test_select_cc_v8i8_i8:
> -; CHECK: and	w0, w0, #0xff
> -; CHECK-NEXT: cmp	w0, w1, uxtb
> -; CHECK-NEXT: csinv	w0, wzr, wzr, ne
> -; CHECK-NEXT: dup	v{{[0-9]+}}.8b, w0
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
> +; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
> +; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
> +; CHECK: cmeq [[MASK:v[0-9]+]].8b, v[[LHS]].8b, v[[RHS]].8b
> +; CHECK: dup [[DUPMASK:v[0-9]+]].8b, [[MASK]].b[0]
> +; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
>   %cmp31 = icmp eq i8 %a, %b
>   %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
>   ret <8x i8> %e
> @@ -14,9 +14,9 @@ define <8x i8> @test_select_cc_v8i8_i8(i
> 
> define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
> ; CHECK-LABEL: test_select_cc_v8i8_f32:
> -; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
> -; CHECK-NEXT: dup	v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
> +; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
> +; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
> +; CHECK-NEXT: bsl [[DUPMASK]].8b, v2.8b, v3.8b
>   %cmp31 = fcmp oeq float %a, %b
>   %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
>   ret <8x i8> %e
> @@ -24,8 +24,8 @@ define <8x i8> @test_select_cc_v8i8_f32(
> 
> define <8x i8> @test_select_cc_v8i8_f64(double %a, double %b, <8x i8> %c, <8x i8> %d ) {
> ; CHECK-LABEL: test_select_cc_v8i8_f64:
> -; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
> +; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
> +; CHECK-NEXT: bsl v[[MASK]].8b, v2.8b, v3.8b
>   %cmp31 = fcmp oeq double %a, %b
>   %e = select i1 %cmp31, <8x i8> %c, <8x i8> %d
>   ret <8x i8> %e
> @@ -33,11 +33,11 @@ define <8x i8> @test_select_cc_v8i8_f64(
> 
> define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d ) {
> ; CHECK-LABEL: test_select_cc_v16i8_i8:
> -; CHECK: and	w0, w0, #0xff
> -; CHECK-NEXT: cmp	w0, w1, uxtb
> -; CHECK-NEXT: csinv	w0, wzr, wzr, ne
> -; CHECK-NEXT: dup	v{{[0-9]+}}.16b, w0
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
> +; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
> +; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
> +; CHECK: cmeq [[MASK:v[0-9]+]].16b, v[[LHS]].16b, v[[RHS]].16b
> +; CHECK: dup [[DUPMASK:v[0-9]+]].16b, [[MASK]].b[0]
> +; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
>   %cmp31 = icmp eq i8 %a, %b
>   %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
>   ret <16x i8> %e
> @@ -45,9 +45,9 @@ define <16x i8> @test_select_cc_v16i8_i8
> 
> define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
> ; CHECK-LABEL: test_select_cc_v16i8_f32:
> -; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
> -; CHECK-NEXT: dup	v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0]
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
> +; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
> +; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
> +; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
>   %cmp31 = fcmp oeq float %a, %b
>   %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
>   ret <16x i8> %e
> @@ -55,9 +55,9 @@ define <16x i8> @test_select_cc_v16i8_f3
> 
> define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
> ; CHECK-LABEL: test_select_cc_v16i8_f64:
> -; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
> -; CHECK-NEXT: dup	v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0]
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
> +; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
> +; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
> +; CHECK-NEXT: bsl [[DUPMASK]].16b, v2.16b, v3.16b
>   %cmp31 = fcmp oeq double %a, %b
>   %e = select i1 %cmp31, <16x i8> %c, <16x i8> %d
>   ret <16x i8> %e
> @@ -65,11 +65,11 @@ define <16x i8> @test_select_cc_v16i8_f6
> 
> define <4x i16> @test_select_cc_v4i16(i16 %a, i16 %b, <4x i16> %c, <4x i16> %d ) {
> ; CHECK-LABEL: test_select_cc_v4i16:
> -; CHECK: and	w0, w0, #0xffff
> -; CHECK-NEXT: cmp	w0, w1, uxth
> -; CHECK-NEXT: csinv	w0, wzr, wzr, ne
> -; CHECK-NEXT: dup	v{{[0-9]+}}.4h, w0
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
> +; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
> +; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
> +; CHECK: cmeq [[MASK:v[0-9]+]].4h, v[[LHS]].4h, v[[RHS]].4h
> +; CHECK: dup [[DUPMASK:v[0-9]+]].4h, [[MASK]].h[0]
> +; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
>   %cmp31 = icmp eq i16 %a, %b
>   %e = select i1 %cmp31, <4x i16> %c, <4x i16> %d
>   ret <4x i16> %e
> @@ -77,11 +77,11 @@ define <4x i16> @test_select_cc_v4i16(i1
> 
> define <8x i16> @test_select_cc_v8i16(i16 %a, i16 %b, <8x i16> %c, <8x i16> %d ) {
> ; CHECK-LABEL: test_select_cc_v8i16:
> -; CHECK: and	w0, w0, #0xffff
> -; CHECK-NEXT: cmp	w0, w1, uxth
> -; CHECK-NEXT: csinv	w0, wzr, wzr, ne
> -; CHECK-NEXT: dup	v{{[0-9]+}}.8h, w0
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
> +; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
> +; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
> +; CHECK: cmeq [[MASK:v[0-9]+]].8h, v[[LHS]].8h, v[[RHS]].8h
> +; CHECK: dup [[DUPMASK:v[0-9]+]].8h, [[MASK]].h[0]
> +; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
>   %cmp31 = icmp eq i16 %a, %b
>   %e = select i1 %cmp31, <8x i16> %c, <8x i16> %d
>   ret <8x i16> %e
> @@ -89,10 +89,11 @@ define <8x i16> @test_select_cc_v8i16(i1
> 
> define <2x i32> @test_select_cc_v2i32(i32 %a, i32 %b, <2x i32> %c, <2x i32> %d ) {
> ; CHECK-LABEL: test_select_cc_v2i32:
> -; CHECK: cmp	w0, w1, uxtw
> -; CHECK-NEXT: csinv	w0, wzr, wzr, ne
> -; CHECK-NEXT: dup	v{{[0-9]+}}.2s, w0
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
> +; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
> +; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
> +; CHECK: cmeq [[MASK:v[0-9]+]].2s, v[[LHS]].2s, v[[RHS]].2s
> +; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
> +; CHECK: bsl [[DUPMASK]].8b, v0.8b, v1.8b
>   %cmp31 = icmp eq i32 %a, %b
>   %e = select i1 %cmp31, <2x i32> %c, <2x i32> %d
>   ret <2x i32> %e
> @@ -100,10 +101,11 @@ define <2x i32> @test_select_cc_v2i32(i3
> 
> define <4x i32> @test_select_cc_v4i32(i32 %a, i32 %b, <4x i32> %c, <4x i32> %d ) {
> ; CHECK-LABEL: test_select_cc_v4i32:
> -; CHECK: cmp	w0, w1, uxtw
> -; CHECK-NEXT: csinv	w0, wzr, wzr, ne
> -; CHECK-NEXT: dup	v{{[0-9]+}}.4s, w0
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
> +; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
> +; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
> +; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
> +; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
> +; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
>   %cmp31 = icmp eq i32 %a, %b
>   %e = select i1 %cmp31, <4x i32> %c, <4x i32> %d
>   ret <4x i32> %e
> @@ -111,10 +113,10 @@ define <4x i32> @test_select_cc_v4i32(i3
> 
> define <1x i64> @test_select_cc_v1i64(i64 %a, i64 %b, <1x i64> %c, <1x i64> %d ) {
> ; CHECK-LABEL: test_select_cc_v1i64:
> -; CHECK: cmp	x0, x1
> -; CHECK-NEXT: csinv	x0, xzr, xzr, ne
> -; CHECK-NEXT: fmov	d{{[0-9]+}}, x0
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
> +; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
> +; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
> +; CHECK: cmeq d[[MASK:[0-9]+]], d[[LHS]], d[[RHS]]
> +; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
>   %cmp31 = icmp eq i64 %a, %b
>   %e = select i1 %cmp31, <1x i64> %c, <1x i64> %d
>   ret <1x i64> %e
> @@ -122,10 +124,11 @@ define <1x i64> @test_select_cc_v1i64(i6
> 
> define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d ) {
> ; CHECK-LABEL: test_select_cc_v2i64:
> -; CHECK: cmp	x0, x1
> -; CHECK-NEXT: csinv	x0, xzr, xzr, ne
> -; CHECK-NEXT: dup	v{{[0-9]+}}.2d, x0
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
> +; CHECK-DAG: fmov d[[LHS:[0-9]+]], x0
> +; CHECK-DAG: fmov d[[RHS:[0-9]+]], x1
> +; CHECK: cmeq [[MASK:v[0-9]+]].2d, v[[LHS]].2d, v[[RHS]].2d
> +; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
> +; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
>   %cmp31 = icmp eq i64 %a, %b
>   %e = select i1 %cmp31, <2x i64> %c, <2x i64> %d
>   ret <2x i64> %e
> @@ -133,18 +136,18 @@ define <2x i64> @test_select_cc_v2i64(i6
> 
> define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
> ; CHECK-LABEL: test_select_cc_v1f32:
> -; CHECK: fcmp	s0, s1
> -; CHECK-NEXT: fcsel	s0, s2, s3, eq
> +; CHECK: fcmp s0, s1
> +; CHECK-NEXT: fcsel s0, s2, s3, eq
>   %cmp31 = fcmp oeq float %a, %b
>   %e = select i1 %cmp31, <1 x float> %c, <1 x float> %d
>   ret <1 x float> %e
> }
> -  
> +
> define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
> ; CHECK-LABEL: test_select_cc_v2f32:
> -; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
> -; CHECK-NEXT: dup	v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
> +; CHECK: fcmeq [[MASK:v[0-9]+]].2s, v0.2s, v1.2s
> +; CHECK: dup [[DUPMASK:v[0-9]+]].2s, [[MASK]].s[0]
> +; CHECK: bsl [[DUPMASK]].8b, v2.8b, v3.8b
>   %cmp31 = fcmp oeq float %a, %b
>   %e = select i1 %cmp31, <2 x float> %c, <2 x float> %d
>   ret <2 x float> %e
> @@ -152,9 +155,9 @@ define <2 x float> @test_select_cc_v2f32
> 
> define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
> ; CHECK-LABEL: test_select_cc_v4f32:
> -; CHECK: fcmeq	v{{[0-9]+}}.4s, v0.4s, v1.4s
> -; CHECK-NEXT: dup	v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0]
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
> +; CHECK: fcmeq [[MASK:v[0-9]+]].4s, v0.4s, v1.4s
> +; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
> +; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
>   %cmp31 = fcmp oeq float %a, %b
>   %e = select i1 %cmp31, <4x float> %c, <4x float> %d
>   ret <4x float> %e
> @@ -162,10 +165,11 @@ define <4x float> @test_select_cc_v4f32(
> 
> define <4x float> @test_select_cc_v4f32_icmp(i32 %a, i32 %b, <4x float> %c, <4x float> %d ) {
> ; CHECK-LABEL: test_select_cc_v4f32_icmp:
> -; CHECK: cmp	w0, w1, uxtw
> -; CHECK: csinv	w0, wzr, wzr, ne
> -; CHECK-NEXT: dup	v{{[0-9]+}}.4s, w0
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v0.16b, v1.16b
> +; CHECK-DAG: fmov s[[LHS:[0-9]+]], w0
> +; CHECK-DAG: fmov s[[RHS:[0-9]+]], w1
> +; CHECK: cmeq [[MASK:v[0-9]+]].4s, v[[LHS]].4s, v[[RHS]].4s
> +; CHECK: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
> +; CHECK: bsl [[DUPMASK]].16b, v0.16b, v1.16b
>   %cmp31 = icmp eq i32 %a, %b
>   %e = select i1 %cmp31, <4x float> %c, <4x float> %d
>   ret <4x float> %e
> @@ -173,8 +177,8 @@ define <4x float> @test_select_cc_v4f32_
> 
> define <1 x double> @test_select_cc_v1f64(double %a, double %b, <1 x double> %c, <1 x double> %d ) {
> ; CHECK-LABEL: test_select_cc_v1f64:
> -; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v2.8b, v3.8b
> +; CHECK: fcmeq d[[MASK:[0-9]+]], d0, d1
> +; CHECK: bsl v[[MASK]].8b, v2.8b, v3.8b
>   %cmp31 = fcmp oeq double %a, %b
>   %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
>   ret <1 x double> %e
> @@ -182,10 +186,10 @@ define <1 x double> @test_select_cc_v1f6
> 
> define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c, <1 x double> %d ) {
> ; CHECK-LABEL: test_select_cc_v1f64_icmp:
> -; CHECK: cmp	 x0, x1
> -; CHECK-NEXT: csinv	x0, xzr, xzr, ne
> -; CHECK-NEXT: fmov	d{{[0-9]+}}, x0
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.8b, v0.8b, v1.8b
> +; CHECK-DAG: fmov [[LHS:d[0-9]+]], x0
> +; CHECK-DAG: fmov [[RHS:d[0-9]+]], x1
> +; CHECK: cmeq d[[MASK:[0-9]+]], [[LHS]], [[RHS]]
> +; CHECK: bsl v[[MASK]].8b, v0.8b, v1.8b
>   %cmp31 = icmp eq i64 %a, %b
>   %e = select i1 %cmp31, <1 x double> %c, <1 x double> %d
>   ret <1 x double> %e
> @@ -193,9 +197,9 @@ define <1 x double> @test_select_cc_v1f6
> 
> define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
> ; CHECK-LABEL: test_select_cc_v2f64:
> -; CHECK: fcmeq	v{{[0-9]+}}.2d, v0.2d, v1.2d
> -; CHECK-NEXT: dup	v{{[0-9]+}}.2d, v{{[0-9]+}}.d[0]
> -; CHECK-NEXT:	bsl	v{{[0-9]+}}.16b, v2.16b, v3.16b
> +; CHECK: fcmeq [[MASK:v[0-9]+]].2d, v0.2d, v1.2d
> +; CHECK: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
> +; CHECK: bsl [[DUPMASK]].16b, v2.16b, v3.16b
>   %cmp31 = fcmp oeq double %a, %b
>   %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
>   ret <2 x double> %e
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20140827/a4303546/attachment.html>


More information about the llvm-commits mailing list