[llvm] r200234 - [DAGCombiner] Teach how to fold sext/aext/zext of constant build vectors.
Daniel Sanders
Daniel.Sanders at imgtec.com
Wed Jan 29 02:14:22 PST 2014
Just to confirm, the MSA test change LGTM. Thanks
> -----Original Message-----
> From: llvm-commits-bounces at cs.uiuc.edu [mailto:llvm-commits-
> bounces at cs.uiuc.edu] On Behalf Of Andrea Di Biagio
> Sent: 27 January 2014 18:46
> To: llvm-commits at cs.uiuc.edu
> Subject: [llvm] r200234 - [DAGCombiner] Teach how to fold sext/aext/zext of
> constant build vectors.
>
> ....
> Author: adibiagio
> Date: Mon Jan 27 12:45:30 2014
> New Revision: 200234
>
> URL: http://llvm.org/viewvc/llvm-project?rev=200234&view=rev
> Log:
> [DAGCombiner] Teach how to fold sext/aext/zext of constant build vectors.
>
> This patch teaches the DAGCombiner how to fold a sext/aext/zext dag node
> when the operand in input is a build vector of constants (or UNDEFs).
>
> The inability to fold a sext/zext of a constant build_vector was the root cause
> of some pcg bugs affecting vselect expansion on x86-64 with AVX support.
>
> Before this change, the DAGCombiner only knew how to fold a
> sext/zext/aext of a ConstantSDNode.
>
>
> Added:
> llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> llvm/trunk/test/CodeGen/Mips/msa/compare_float.ll
> llvm/trunk/test/CodeGen/X86/avx-blend.ll
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=200
> 234&r1=200233&r2=200234&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Jan 27
> +++ 12:45:30 2014
> @@ -4577,6 +4577,62 @@ SDValue DAGCombiner::visitSETCC(SDNode *
> SDLoc(N));
> }
>
> +// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext // dag node
> +into a ConstantSDNode or a build_vector of constants.
> +// This function is called by the DAGCombiner when visiting
> +sext/zext/aext // dag nodes (see for example method
> DAGCombiner::visitSIGN_EXTEND).
> +// Vector extends are not folded if operations are legal; this is to //
> +avoid introducing illegal build_vector dag nodes.
> +static SDNode *tryToFoldExtendOfConstant(SDNode *N, SelectionDAG
> &DAG,
> + bool LegalOperations) {
> + unsigned Opcode = N->getOpcode();
> + SDValue N0 = N->getOperand(0);
> + EVT VT = N->getValueType(0);
> +
> + assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
> + Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in
> + input!");
> +
> + // fold (sext c1) -> c1
> + // fold (zext c1) -> c1
> + // fold (aext c1) -> c1
> + if (isa<ConstantSDNode>(N0))
> + return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
> +
> + // fold (sext (build_vector AllConstants) -> (build_vector
> + AllConstants) // fold (zext (build_vector AllConstants) ->
> + (build_vector AllConstants) // fold (aext (build_vector AllConstants)
> + -> (build_vector AllConstants) if (!(VT.isVector() && !LegalOperations &&
> + ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
> + return 0;
> +
> + // We can fold this node into a build_vector.
> + unsigned VTBits = VT.getScalarType().getSizeInBits();
> + unsigned EVTBits =
> + N0->getValueType(0).getScalarType().getSizeInBits();
> + unsigned ShAmt = VTBits - EVTBits;
> + SmallVector<SDValue, 8> Elts;
> + unsigned NumElts = N0->getNumOperands(); SDLoc DL(N);
> +
> + for (unsigned i=0; i != NumElts; ++i) {
> + SDValue Op = N0->getOperand(i);
> + if (Op->getOpcode() == ISD::UNDEF) {
> + Elts.push_back(DAG.getUNDEF(VT.getScalarType()));
> + continue;
> + }
> +
> + ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
> + const APInt &C = APInt(VTBits, CurrentND-
> >getAPIntValue().getZExtValue());
> + if (Opcode == ISD::SIGN_EXTEND)
> +
> Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
> + VT.getScalarType()));
> + else
> +
> Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(),
> + VT.getScalarType())); }
> +
> + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0],
> +NumElts).getNode(); }
> +
> // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable
> this:
> // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
> // transformation. Returns true if extension are possible and the above @@
> -4667,9 +4723,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SD
> SDValue N0 = N->getOperand(0);
> EVT VT = N->getValueType(0);
>
> - // fold (sext c1) -> c1
> - if (isa<ConstantSDNode>(N0))
> - return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0);
> + if (SDNode *Res = tryToFoldExtendOfConstant(N, DAG, LegalOperations))
> + return SDValue(Res, 0);
>
> // fold (sext (sext x)) -> (sext x)
> // fold (sext (aext x)) -> (sext x)
> @@ -4917,9 +4972,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SD
> SDValue N0 = N->getOperand(0);
> EVT VT = N->getValueType(0);
>
> - // fold (zext c1) -> c1
> - if (isa<ConstantSDNode>(N0))
> - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
> + if (SDNode *Res = tryToFoldExtendOfConstant(N, DAG, LegalOperations))
> + return SDValue(Res, 0);
> +
> // fold (zext (zext x)) -> (zext x)
> // fold (zext (aext x)) -> (zext x)
> if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() ==
> ISD::ANY_EXTEND) @@ -5186,9 +5241,9 @@ SDValue
> DAGCombiner::visitANY_EXTEND(SDN
> SDValue N0 = N->getOperand(0);
> EVT VT = N->getValueType(0);
>
> - // fold (aext c1) -> c1
> - if (isa<ConstantSDNode>(N0))
> - return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0);
> + if (SDNode *Res = tryToFoldExtendOfConstant(N, DAG, LegalOperations))
> + return SDValue(Res, 0);
> +
> // fold (aext (aext x)) -> (aext x)
> // fold (aext (zext x)) -> (zext x)
> // fold (aext (sext x)) -> (sext x)
>
> Modified: llvm/trunk/test/CodeGen/Mips/msa/compare_float.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/CodeGen/Mips/msa/compare_float.ll?rev=200234&
> r1=200233&r2=200234&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/CodeGen/Mips/msa/compare_float.ll (original)
> +++ llvm/trunk/test/CodeGen/Mips/msa/compare_float.ll Mon Jan 27
> +++ 12:45:30 2014
> @@ -32,12 +32,9 @@ define void @false_v2f64(<2 x i64>* %c,
> store <2 x i64> %4, <2 x i64>* %c
> ret void
>
> - ; FIXME: This code is correct, but poor. Ideally it would be similar to
> - ; the code in @false_v4f32
> + ; (setcc $a, $b, SETFALSE) is always folded
> ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], 0
> - ; CHECK-DAG: slli.d [[R3:\$w[0-9]+]], [[R1]], 63
> - ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R3]], 63
> - ; CHECK-DAG: st.d [[R4]], 0($4)
> + ; CHECK-DAG: st.w [[R1]], 0($4)
> ; CHECK: .size false_v2f64
> }
>
> @@ -509,12 +506,9 @@ define void @true_v2f64(<2 x i64>* %c, <
> store <2 x i64> %4, <2 x i64>* %c
> ret void
>
> - ; FIXME: This code is correct, but poor. Ideally it would be similar to
> - ; the code in @true_v4f32
> - ; CHECK-DAG: ldi.d [[R1:\$w[0-9]+]], 1
> - ; CHECK-DAG: slli.d [[R3:\$w[0-9]+]], [[R1]], 63
> - ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R3]], 63
> - ; CHECK-DAG: st.d [[R4]], 0($4)
> + ; (setcc $a, $b, SETTRUE) is always folded.
> + ; CHECK-DAG: ldi.b [[R1:\$w[0-9]+]], -1 ; CHECK-DAG: st.w [[R1]],
> + 0($4)
> ; CHECK: .size true_v2f64
> }
>
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-blend.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/CodeGen/X86/avx-
> blend.ll?rev=200234&r1=200233&r2=200234&view=diff
> ==========================================================
> ====================
> --- llvm/trunk/test/CodeGen/X86/avx-blend.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-blend.ll Mon Jan 27 12:45:30 2014
> @@ -51,6 +51,7 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1,
>
>
> ;CHECK-LABEL: vsel_float8:
> +;CHECK-NOT: vinsertf128
> ;CHECK: vblendvps
> ;CHECK: ret
> define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { @@ -59,8
> +60,9 @@ define <8 x float> @vsel_float8(<8 x flo }
>
> ;CHECK-LABEL: vsel_i328:
> +;CHECK-NOT: vinsertf128
> ;CHECK: vblendvps
> -;CHECK: ret
> +;CHECK-NEXT: ret
> define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
> %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1
> false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
> ret <8 x i32> %vsel
> @@ -82,6 +84,15 @@ define <8 x i64> @vsel_i648(<8 x i64> %v
> ret <8 x i64> %vsel
> }
>
> +;CHECK-LABEL: vsel_double4:
> +;CHECK-NOT: vinsertf128
> +;CHECK: vblendvpd
> +;CHECK-NEXT: ret
> +define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
> + %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x
> +double> %v1, <4 x double> %v2
> + ret <4 x double> %vsel
> +}
> +
> ;; TEST blend + compares
> ; CHECK: testa
> define <2 x double> @testa(<2 x double> %x, <2 x double> %y) {
>
> Added: llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll
> URL: http://llvm.org/viewvc/llvm-
> project/llvm/trunk/test/CodeGen/X86/fold-vector-sext-
> zext.ll?rev=200234&view=auto
> ==========================================================
> ====================
> --- llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll Mon Jan 27
> +++ 12:45:30 2014
> @@ -0,0 +1,291 @@
> +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx
> +-mattr=+avx | FileCheck %s
> +
> +; Verify that the backend correctly folds a sign/zero extend of a
> +vector where ; elements are all constant values or UNDEFs.
> +; The backend should be able to optimize all the test functions below
> +into ; simple loads from constant pool of the result. That is because
> +the resulting ; vector should be known at static time.
> +
> +
> +define <4 x i16> @test1() {
> + %1 = insertelement <4 x i8> undef, i8 0, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 2, i32 2
> + %4 = insertelement <4 x i8> %3, i8 -3, i32 3
> + %5 = sext <4 x i8> %4 to <4 x i16>
> + ret <4 x i16> %5
> +}
> +; CHECK-LABEL: test1
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <4 x i16> @test2() {
> + %1 = insertelement <4 x i8> undef, i8 undef, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 undef, i32 2
> + %4 = insertelement <4 x i8> %3, i8 -3, i32 3
> + %5 = sext <4 x i8> %4 to <4 x i16>
> + ret <4 x i16> %5
> +}
> +; CHECK-LABEL: test2
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <4 x i32> @test3() {
> + %1 = insertelement <4 x i8> undef, i8 0, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 2, i32 2
> + %4 = insertelement <4 x i8> %3, i8 -3, i32 3
> + %5 = sext <4 x i8> %4 to <4 x i32>
> + ret <4 x i32> %5
> +}
> +; CHECK-LABEL: test3
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <4 x i32> @test4() {
> + %1 = insertelement <4 x i8> undef, i8 undef, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 undef, i32 2
> + %4 = insertelement <4 x i8> %3, i8 -3, i32 3
> + %5 = sext <4 x i8> %4 to <4 x i32>
> + ret <4 x i32> %5
> +}
> +; CHECK-LABEL: test4
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +
> +define <4 x i64> @test5() {
> + %1 = insertelement <4 x i8> undef, i8 0, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 2, i32 2
> + %4 = insertelement <4 x i8> %3, i8 -3, i32 3
> + %5 = sext <4 x i8> %4 to <4 x i64>
> + ret <4 x i64> %5
> +}
> +; CHECK-LABEL: test5
> +; CHECK-NOT: vinsertf128
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <4 x i64> @test6() {
> + %1 = insertelement <4 x i8> undef, i8 undef, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 undef, i32 2
> + %4 = insertelement <4 x i8> %3, i8 -3, i32 3
> + %5 = sext <4 x i8> %4 to <4 x i64>
> + ret <4 x i64> %5
> +}
> +; CHECK-LABEL: test6
> +; CHECK-NOT: vinsertf128
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <8 x i16> @test7() {
> + %1 = insertelement <8 x i8> undef, i8 0, i32 0
> + %2 = insertelement <8 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <8 x i8> %2, i8 2, i32 2
> + %4 = insertelement <8 x i8> %3, i8 -3, i32 3
> + %5 = insertelement <8 x i8> %4, i8 4, i32 4
> + %6 = insertelement <8 x i8> %5, i8 -5, i32 5
> + %7 = insertelement <8 x i8> %6, i8 6, i32 6
> + %8 = insertelement <8 x i8> %7, i8 -7, i32 7
> + %9 = sext <8 x i8> %4 to <8 x i16>
> + ret <8 x i16> %9
> +}
> +; CHECK-LABEL: test7
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <8 x i32> @test8() {
> + %1 = insertelement <8 x i8> undef, i8 0, i32 0
> + %2 = insertelement <8 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <8 x i8> %2, i8 2, i32 2
> + %4 = insertelement <8 x i8> %3, i8 -3, i32 3
> + %5 = insertelement <8 x i8> %4, i8 4, i32 4
> + %6 = insertelement <8 x i8> %5, i8 -5, i32 5
> + %7 = insertelement <8 x i8> %6, i8 6, i32 6
> + %8 = insertelement <8 x i8> %7, i8 -7, i32 7
> + %9 = sext <8 x i8> %4 to <8 x i32>
> + ret <8 x i32> %9
> +}
> +; CHECK-LABEL: test8
> +; CHECK-NOT: vinsertf128
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <8 x i16> @test9() {
> + %1 = insertelement <8 x i8> undef, i8 undef, i32 0
> + %2 = insertelement <8 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <8 x i8> %2, i8 undef, i32 2
> + %4 = insertelement <8 x i8> %3, i8 -3, i32 3
> + %5 = insertelement <8 x i8> %4, i8 undef, i32 4
> + %6 = insertelement <8 x i8> %5, i8 -5, i32 5
> + %7 = insertelement <8 x i8> %6, i8 undef, i32 6
> + %8 = insertelement <8 x i8> %7, i8 -7, i32 7
> + %9 = sext <8 x i8> %4 to <8 x i16>
> + ret <8 x i16> %9
> +}
> +; CHECK-LABEL: test9
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <8 x i32> @test10() {
> + %1 = insertelement <8 x i8> undef, i8 0, i32 0
> + %2 = insertelement <8 x i8> %1, i8 undef, i32 1
> + %3 = insertelement <8 x i8> %2, i8 2, i32 2
> + %4 = insertelement <8 x i8> %3, i8 undef, i32 3
> + %5 = insertelement <8 x i8> %4, i8 4, i32 4
> + %6 = insertelement <8 x i8> %5, i8 undef, i32 5
> + %7 = insertelement <8 x i8> %6, i8 6, i32 6
> + %8 = insertelement <8 x i8> %7, i8 undef, i32 7
> + %9 = sext <8 x i8> %4 to <8 x i32>
> + ret <8 x i32> %9
> +}
> +; CHECK-LABEL: test10
> +; CHECK-NOT: vinsertf128
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +
> +define <4 x i16> @test11() {
> + %1 = insertelement <4 x i8> undef, i8 0, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 2, i32 2
> + %4 = insertelement <4 x i8> %3, i8 -3, i32 3
> + %5 = zext <4 x i8> %4 to <4 x i16>
> + ret <4 x i16> %5
> +}
> +; CHECK-LABEL: test11
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <4 x i32> @test12() {
> + %1 = insertelement <4 x i8> undef, i8 0, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 2, i32 2
> + %4 = insertelement <4 x i8> %3, i8 -3, i32 3
> + %5 = zext <4 x i8> %4 to <4 x i32>
> + ret <4 x i32> %5
> +}
> +; CHECK-LABEL: test12
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <4 x i64> @test13() {
> + %1 = insertelement <4 x i8> undef, i8 0, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 2, i32 2
> + %4 = insertelement <4 x i8> %3, i8 -3, i32 3
> + %5 = zext <4 x i8> %4 to <4 x i64>
> + ret <4 x i64> %5
> +}
> +; CHECK-LABEL: test13
> +; CHECK-NOT: vinsertf128
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <4 x i16> @test14() {
> + %1 = insertelement <4 x i8> undef, i8 undef, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 undef, i32 2
> + %4 = insertelement <4 x i8> %3, i8 -3, i32 3
> + %5 = zext <4 x i8> %4 to <4 x i16>
> + ret <4 x i16> %5
> +}
> +; CHECK-LABEL: test14
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <4 x i32> @test15() {
> + %1 = insertelement <4 x i8> undef, i8 0, i32 0
> + %2 = insertelement <4 x i8> %1, i8 undef, i32 1
> + %3 = insertelement <4 x i8> %2, i8 2, i32 2
> + %4 = insertelement <4 x i8> %3, i8 undef, i32 3
> + %5 = zext <4 x i8> %4 to <4 x i32>
> + ret <4 x i32> %5
> +}
> +; CHECK-LABEL: test15
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <4 x i64> @test16() {
> + %1 = insertelement <4 x i8> undef, i8 undef, i32 0
> + %2 = insertelement <4 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <4 x i8> %2, i8 2, i32 2
> + %4 = insertelement <4 x i8> %3, i8 undef, i32 3
> + %5 = zext <4 x i8> %4 to <4 x i64>
> + ret <4 x i64> %5
> +}
> +; CHECK-LABEL: test16
> +; CHECK-NOT: vinsertf128
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <8 x i16> @test17() {
> + %1 = insertelement <8 x i8> undef, i8 0, i32 0
> + %2 = insertelement <8 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <8 x i8> %2, i8 2, i32 2
> + %4 = insertelement <8 x i8> %3, i8 -3, i32 3
> + %5 = insertelement <8 x i8> %4, i8 4, i32 4
> + %6 = insertelement <8 x i8> %5, i8 -5, i32 5
> + %7 = insertelement <8 x i8> %6, i8 6, i32 6
> + %8 = insertelement <8 x i8> %7, i8 -7, i32 7
> + %9 = zext <8 x i8> %8 to <8 x i16>
> + ret <8 x i16> %9
> +}
> +; CHECK-LABEL: test17
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <8 x i32> @test18() {
> + %1 = insertelement <8 x i8> undef, i8 0, i32 0
> + %2 = insertelement <8 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <8 x i8> %2, i8 2, i32 2
> + %4 = insertelement <8 x i8> %3, i8 -3, i32 3
> + %5 = insertelement <8 x i8> %4, i8 4, i32 4
> + %6 = insertelement <8 x i8> %5, i8 -5, i32 5
> + %7 = insertelement <8 x i8> %6, i8 6, i32 6
> + %8 = insertelement <8 x i8> %7, i8 -7, i32 7
> + %9 = zext <8 x i8> %8 to <8 x i32>
> + ret <8 x i32> %9
> +}
> +; CHECK-LABEL: test18
> +; CHECK-NOT: vinsertf128
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <8 x i16> @test19() {
> + %1 = insertelement <8 x i8> undef, i8 undef, i32 0
> + %2 = insertelement <8 x i8> %1, i8 -1, i32 1
> + %3 = insertelement <8 x i8> %2, i8 undef, i32 2
> + %4 = insertelement <8 x i8> %3, i8 -3, i32 3
> + %5 = insertelement <8 x i8> %4, i8 undef, i32 4
> + %6 = insertelement <8 x i8> %5, i8 -5, i32 5
> + %7 = insertelement <8 x i8> %6, i8 undef, i32 6
> + %8 = insertelement <8 x i8> %7, i8 -7, i32 7
> + %9 = zext <8 x i8> %8 to <8 x i16>
> + ret <8 x i16> %9
> +}
> +; CHECK-LABEL: test19
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
> +define <8 x i32> @test20() {
> + %1 = insertelement <8 x i8> undef, i8 0, i32 0
> + %2 = insertelement <8 x i8> %1, i8 undef, i32 1
> + %3 = insertelement <8 x i8> %2, i8 2, i32 2
> + %4 = insertelement <8 x i8> %3, i8 -3, i32 3
> + %5 = insertelement <8 x i8> %4, i8 4, i32 4
> + %6 = insertelement <8 x i8> %5, i8 undef, i32 5
> + %7 = insertelement <8 x i8> %6, i8 6, i32 6
> + %8 = insertelement <8 x i8> %7, i8 undef, i32 7
> + %9 = zext <8 x i8> %8 to <8 x i32>
> + ret <8 x i32> %9
> +}
> +; CHECK-LABEL: test20
> +; CHECK-NOT: vinsertf128
> +; CHECK: vmovaps
> +; CHECK-NEXT: ret
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
> To report this email as SPAM, please forward it to spam at websense.com
More information about the llvm-commits
mailing list