[llvm] r304072 - [DAGCombiner] use narrow load to avoid vector extract
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sat May 27 07:07:04 PDT 2017
Author: spatel
Date: Sat May 27 09:07:03 2017
New Revision: 304072
URL: http://llvm.org/viewvc/llvm-project?rev=304072&view=rev
Log:
[DAGCombiner] use narrow load to avoid vector extract
If we have (extract_subvector(load wide vector)) with no other users,
that can just be (load narrow vector). This is intentionally conservative.
Follow-ups may loosen the one-use constraint to account for the extract cost
or just remove the one-use check.
The memop chain updating is based on code that already exists multiple times
in x86 lowering, so that should be pulled into a helper function as a follow-up.
Background: this is a potential improvement noticed via regressions caused by
making x86's peekThroughBitcasts() not loop on consecutive bitcasts (see
comments in D33137).
Differential Revision: https://reviews.llvm.org/D33578
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/AArch64/arm64-vabs.ll
llvm/trunk/test/CodeGen/AArch64/arm64-vadd.ll
llvm/trunk/test/CodeGen/AArch64/arm64-vmul.ll
llvm/trunk/test/CodeGen/AArch64/arm64-vshift.ll
llvm/trunk/test/CodeGen/AArch64/arm64-vsub.ll
llvm/trunk/test/CodeGen/ARM/vcombine.ll
llvm/trunk/test/CodeGen/ARM/vext.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll
llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat May 27 09:07:03 2017
@@ -14555,6 +14555,52 @@ static SDValue narrowExtractedVectorBinO
return DAG.getBitcast(VT, NarrowBinOp);
}
+/// If we are extracting a subvector from a wide vector load, convert to a
+/// narrow load to eliminate the extraction:
+/// (extract_subvector (load wide vector)) --> (load narrow vector)
+static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
+ // TODO: Add support for big-endian. The offset calculation must be adjusted.
+ if (DAG.getDataLayout().isBigEndian())
+ return SDValue();
+
+ // TODO: The one-use check is overly conservative. Check the cost of the
+ // extract instead or remove that condition entirely.
+ auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
+ auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
+ if (!Ld || !Ld->hasOneUse() || Ld->isVolatile() || !ExtIdx)
+ return SDValue();
+
+ // The narrow load will be offset from the base address of the old load if
+ // we are extracting from something besides index 0 (little-endian).
+ EVT VT = Extract->getValueType(0);
+ SDLoc DL(Extract);
+ SDValue BaseAddr = Ld->getOperand(1);
+ unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
+
+ // TODO: Use "BaseIndexOffset" to make this more effective.
+ SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
+ VT.getStoreSize());
+ SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
+
+ // The new load must have the same position as the old load in terms of memory
+ // dependency. Create a TokenFactor for Ld and NewLd and update uses of Ld's
+ // output chain to use that TokenFactor.
+ // TODO: This code is based on a similar sequence in x86 lowering. It should
+ // be moved to a helper function, so it can be shared and reused.
+ if (Ld->hasAnyUseOfValue(1)) {
+ SDValue OldChain = SDValue(Ld, 1);
+ SDValue NewChain = SDValue(NewLd.getNode(), 1);
+ SDValue TokenFactor = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ OldChain, NewChain);
+ DAG.ReplaceAllUsesOfValueWith(OldChain, TokenFactor);
+ DAG.UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain);
+ }
+
+ return NewLd;
+}
+
SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
@@ -14563,6 +14609,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVEC
if (V.isUndef())
return DAG.getUNDEF(NVT);
+ if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
+ if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
+ return NarrowLoad;
+
// Combine:
// (extract_subvec (concat V1, V2, ...), i)
// Into:
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vabs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vabs.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vabs.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vabs.ll Sat May 27 09:07:03 2017
@@ -33,7 +33,7 @@ define <2 x i64> @sabdl2d(<2 x i32>* %A,
define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: sabdl2_8h:
-;CHECK: sabdl2.8h
+;CHECK: sabdl.8h
%load1 = load <16 x i8>, <16 x i8>* %A
%load2 = load <16 x i8>, <16 x i8>* %B
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -45,7 +45,7 @@ define <8 x i16> @sabdl2_8h(<16 x i8>* %
define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sabdl2_4s:
-;CHECK: sabdl2.4s
+;CHECK: sabdl.4s
%load1 = load <8 x i16>, <8 x i16>* %A
%load2 = load <8 x i16>, <8 x i16>* %B
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -57,7 +57,7 @@ define <4 x i32> @sabdl2_4s(<8 x i16>* %
define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sabdl2_2d:
-;CHECK: sabdl2.2d
+;CHECK: sabdl.2d
%load1 = load <4 x i32>, <4 x i32>* %A
%load2 = load <4 x i32>, <4 x i32>* %B
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -99,7 +99,7 @@ define <2 x i64> @uabdl2d(<2 x i32>* %A,
define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uabdl2_8h:
-;CHECK: uabdl2.8h
+;CHECK: uabdl.8h
%load1 = load <16 x i8>, <16 x i8>* %A
%load2 = load <16 x i8>, <16 x i8>* %B
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -112,7 +112,7 @@ define <8 x i16> @uabdl2_8h(<16 x i8>* %
define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uabdl2_4s:
-;CHECK: uabdl2.4s
+;CHECK: uabdl.4s
%load1 = load <8 x i16>, <8 x i16>* %A
%load2 = load <8 x i16>, <8 x i16>* %B
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -124,7 +124,7 @@ define <4 x i32> @uabdl2_4s(<8 x i16>* %
define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uabdl2_2d:
-;CHECK: uabdl2.2d
+;CHECK: uabdl.2d
%load1 = load <4 x i32>, <4 x i32>* %A
%load2 = load <4 x i32>, <4 x i32>* %B
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -561,7 +561,7 @@ define <2 x i64> @sabal2d(<2 x i32>* %A,
define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: sabal2_8h:
-;CHECK: sabal2.8h
+;CHECK: sabal.8h
%load1 = load <16 x i8>, <16 x i8>* %A
%load2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = load <8 x i16>, <8 x i16>* %C
@@ -575,7 +575,7 @@ define <8 x i16> @sabal2_8h(<16 x i8>* %
define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sabal2_4s:
-;CHECK: sabal2.4s
+;CHECK: sabal.4s
%load1 = load <8 x i16>, <8 x i16>* %A
%load2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = load <4 x i32>, <4 x i32>* %C
@@ -589,7 +589,7 @@ define <4 x i32> @sabal2_4s(<8 x i16>* %
define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sabal2_2d:
-;CHECK: sabal2.2d
+;CHECK: sabal.2d
%load1 = load <4 x i32>, <4 x i32>* %A
%load2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = load <2 x i64>, <2 x i64>* %C
@@ -639,7 +639,7 @@ define <2 x i64> @uabal2d(<2 x i32>* %A,
define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
;CHECK-LABEL: uabal2_8h:
-;CHECK: uabal2.8h
+;CHECK: uabal.8h
%load1 = load <16 x i8>, <16 x i8>* %A
%load2 = load <16 x i8>, <16 x i8>* %B
%tmp3 = load <8 x i16>, <8 x i16>* %C
@@ -653,7 +653,7 @@ define <8 x i16> @uabal2_8h(<16 x i8>* %
define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: uabal2_4s:
-;CHECK: uabal2.4s
+;CHECK: uabal.4s
%load1 = load <8 x i16>, <8 x i16>* %A
%load2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = load <4 x i32>, <4 x i32>* %C
@@ -667,7 +667,7 @@ define <4 x i32> @uabal2_4s(<8 x i16>* %
define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: uabal2_2d:
-;CHECK: uabal2.2d
+;CHECK: uabal.2d
%load1 = load <4 x i32>, <4 x i32>* %A
%load2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = load <2 x i64>, <2 x i64>* %C
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vadd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vadd.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vadd.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vadd.ll Sat May 27 09:07:03 2017
@@ -318,7 +318,7 @@ define <2 x i64> @uaddw2d(<2 x i64>* %A,
define <8 x i16> @uaddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: uaddw2_8h:
-;CHECK: uaddw2.8h
+;CHECK: uaddw.8h
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
@@ -331,7 +331,7 @@ define <8 x i16> @uaddw2_8h(<8 x i16>* %
define <4 x i32> @uaddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: uaddw2_4s:
-;CHECK: uaddw2.4s
+;CHECK: uaddw.4s
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
@@ -344,7 +344,7 @@ define <4 x i32> @uaddw2_4s(<4 x i32>* %
define <2 x i64> @uaddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: uaddw2_2d:
-;CHECK: uaddw2.2d
+;CHECK: uaddw.2d
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <4 x i32>, <4 x i32>* %B
@@ -387,7 +387,7 @@ define <2 x i64> @saddw2d(<2 x i64>* %A,
define <8 x i16> @saddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: saddw2_8h:
-;CHECK: saddw2.8h
+;CHECK: saddw.8h
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
@@ -400,7 +400,7 @@ define <8 x i16> @saddw2_8h(<8 x i16>* %
define <4 x i32> @saddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: saddw2_4s:
-;CHECK: saddw2.4s
+;CHECK: saddw.4s
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
@@ -413,7 +413,7 @@ define <4 x i32> @saddw2_4s(<4 x i32>* %
define <2 x i64> @saddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: saddw2_2d:
-;CHECK: saddw2.2d
+;CHECK: saddw.2d
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <4 x i32>, <4 x i32>* %B
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vmul.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vmul.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vmul.ll Sat May 27 09:07:03 2017
@@ -83,7 +83,7 @@ define <2 x i64> @sqdmull2d(<2 x i32>* %
define <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqdmull2_4s:
-;CHECK: sqdmull2.4s
+;CHECK: sqdmull.4s
%load1 = load <8 x i16>, <8 x i16>* %A
%load2 = load <8 x i16>, <8 x i16>* %B
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -94,7 +94,7 @@ define <4 x i32> @sqdmull2_4s(<8 x i16>*
define <2 x i64> @sqdmull2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqdmull2_2d:
-;CHECK: sqdmull2.2d
+;CHECK: sqdmull.2d
%load1 = load <4 x i32>, <4 x i32>* %A
%load2 = load <4 x i32>, <4 x i32>* %B
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -324,7 +324,7 @@ define <2 x i64> @sqdmlal2d(<2 x i32>* %
define <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sqdmlal2_4s:
-;CHECK: sqdmlal2.4s
+;CHECK: sqdmlal.4s
%load1 = load <8 x i16>, <8 x i16>* %A
%load2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = load <4 x i32>, <4 x i32>* %C
@@ -337,7 +337,7 @@ define <4 x i32> @sqdmlal2_4s(<8 x i16>*
define <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sqdmlal2_2d:
-;CHECK: sqdmlal2.2d
+;CHECK: sqdmlal.2d
%load1 = load <4 x i32>, <4 x i32>* %A
%load2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = load <2 x i64>, <2 x i64>* %C
@@ -372,7 +372,7 @@ define <2 x i64> @sqdmlsl2d(<2 x i32>* %
define <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sqdmlsl2_4s:
-;CHECK: sqdmlsl2.4s
+;CHECK: sqdmlsl.4s
%load1 = load <8 x i16>, <8 x i16>* %A
%load2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = load <4 x i32>, <4 x i32>* %C
@@ -385,7 +385,7 @@ define <4 x i32> @sqdmlsl2_4s(<8 x i16>*
define <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sqdmlsl2_2d:
-;CHECK: sqdmlsl2.2d
+;CHECK: sqdmlsl.2d
%load1 = load <4 x i32>, <4 x i32>* %A
%load2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = load <2 x i64>, <2 x i64>* %C
@@ -874,7 +874,7 @@ define <2 x i64> @sqdmull_lane_2d(<2 x i
define <4 x i32> @sqdmull2_lane_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: sqdmull2_lane_4s:
;CHECK-NOT: dup
-;CHECK: sqdmull2.4s
+;CHECK: sqdmull.4s
%load1 = load <8 x i16>, <8 x i16>* %A
%load2 = load <8 x i16>, <8 x i16>* %B
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -886,7 +886,7 @@ define <4 x i32> @sqdmull2_lane_4s(<8 x
define <2 x i64> @sqdmull2_lane_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: sqdmull2_lane_2d:
;CHECK-NOT: dup
-;CHECK: sqdmull2.2d
+;CHECK: sqdmull.2d
%load1 = load <4 x i32>, <4 x i32>* %A
%load2 = load <4 x i32>, <4 x i32>* %B
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -994,7 +994,7 @@ define <2 x i64> @sqdmlal_lane_2d(<2 x i
define <4 x i32> @sqdmlal2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sqdmlal2_lane_4s:
;CHECK-NOT: dup
-;CHECK: sqdmlal2.4s
+;CHECK: sqdmlal.4s
%load1 = load <8 x i16>, <8 x i16>* %A
%load2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = load <4 x i32>, <4 x i32>* %C
@@ -1008,7 +1008,7 @@ define <4 x i32> @sqdmlal2_lane_4s(<8 x
define <2 x i64> @sqdmlal2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sqdmlal2_lane_2d:
;CHECK-NOT: dup
-;CHECK: sqdmlal2.2d
+;CHECK: sqdmlal.2d
%load1 = load <4 x i32>, <4 x i32>* %A
%load2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = load <2 x i64>, <2 x i64>* %C
@@ -1147,7 +1147,7 @@ define <2 x i64> @sqdmlsl_lane_2d(<2 x i
define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
;CHECK-LABEL: sqdmlsl2_lane_4s:
;CHECK-NOT: dup
-;CHECK: sqdmlsl2.4s
+;CHECK: sqdmlsl.4s
%load1 = load <8 x i16>, <8 x i16>* %A
%load2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = load <4 x i32>, <4 x i32>* %C
@@ -1161,7 +1161,7 @@ define <4 x i32> @sqdmlsl2_lane_4s(<8 x
define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
;CHECK-LABEL: sqdmlsl2_lane_2d:
;CHECK-NOT: dup
-;CHECK: sqdmlsl2.2d
+;CHECK: sqdmlsl.2d
%load1 = load <4 x i32>, <4 x i32>* %A
%load2 = load <4 x i32>, <4 x i32>* %B
%tmp3 = load <2 x i64>, <2 x i64>* %C
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vshift.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vshift.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vshift.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vshift.ll Sat May 27 09:07:03 2017
@@ -1164,7 +1164,7 @@ define <2 x i64> @ushll2d(<2 x i32>* %A)
define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind {
;CHECK-LABEL: ushll2_8h:
-;CHECK: ushll2.8h v0, {{v[0-9]+}}, #1
+;CHECK: ushll.8h v0, {{v[0-9]+}}, #1
%load1 = load <16 x i8>, <16 x i8>* %A
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
@@ -1174,7 +1174,7 @@ define <8 x i16> @ushll2_8h(<16 x i8>* %
define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind {
;CHECK-LABEL: ushll2_4s:
-;CHECK: ushll2.4s v0, {{v[0-9]+}}, #1
+;CHECK: ushll.4s v0, {{v[0-9]+}}, #1
%load1 = load <8 x i16>, <8 x i16>* %A
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
@@ -1184,7 +1184,7 @@ define <4 x i32> @ushll2_4s(<8 x i16>* %
define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind {
;CHECK-LABEL: ushll2_2d:
-;CHECK: ushll2.2d v0, {{v[0-9]+}}, #1
+;CHECK: ushll.2d v0, {{v[0-9]+}}, #1
%load1 = load <4 x i32>, <4 x i32>* %A
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
@@ -1221,7 +1221,7 @@ define <2 x i64> @sshll2d(<2 x i32>* %A)
define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
;CHECK-LABEL: sshll2_8h:
-;CHECK: sshll2.8h v0, {{v[0-9]+}}, #1
+;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
%load1 = load <16 x i8>, <16 x i8>* %A
%tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
@@ -1231,7 +1231,7 @@ define <8 x i16> @sshll2_8h(<16 x i8>* %
define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind {
;CHECK-LABEL: sshll2_4s:
-;CHECK: sshll2.4s v0, {{v[0-9]+}}, #1
+;CHECK: sshll.4s v0, {{v[0-9]+}}, #1
%load1 = load <8 x i16>, <8 x i16>* %A
%tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
@@ -1241,7 +1241,7 @@ define <4 x i32> @sshll2_4s(<8 x i16>* %
define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind {
;CHECK-LABEL: sshll2_2d:
-;CHECK: sshll2.2d v0, {{v[0-9]+}}, #1
+;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
%load1 = load <4 x i32>, <4 x i32>* %A
%tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-vsub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-vsub.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-vsub.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-vsub.ll Sat May 27 09:07:03 2017
@@ -157,7 +157,7 @@ define <2 x i64> @ssubl2d(<2 x i32>* %A,
define <8 x i16> @ssubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: ssubl2_8h:
-;CHECK: ssubl2.8h
+;CHECK: ssubl.8h
%tmp1 = load <16 x i8>, <16 x i8>* %A
%high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext1 = sext <8 x i8> %high1 to <8 x i16>
@@ -172,7 +172,7 @@ define <8 x i16> @ssubl2_8h(<16 x i8>* %
define <4 x i32> @ssubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: ssubl2_4s:
-;CHECK: ssubl2.4s
+;CHECK: ssubl.4s
%tmp1 = load <8 x i16>, <8 x i16>* %A
%high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext1 = sext <4 x i16> %high1 to <4 x i32>
@@ -187,7 +187,7 @@ define <4 x i32> @ssubl2_4s(<8 x i16>* %
define <2 x i64> @ssubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: ssubl2_2d:
-;CHECK: ssubl2.2d
+;CHECK: ssubl.2d
%tmp1 = load <4 x i32>, <4 x i32>* %A
%high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%ext1 = sext <2 x i32> %high1 to <2 x i64>
@@ -235,7 +235,7 @@ define <2 x i64> @usubl2d(<2 x i32>* %A,
define <8 x i16> @usubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: usubl2_8h:
-;CHECK: usubl2.8h
+;CHECK: usubl.8h
%tmp1 = load <16 x i8>, <16 x i8>* %A
%high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext1 = zext <8 x i8> %high1 to <8 x i16>
@@ -250,7 +250,7 @@ define <8 x i16> @usubl2_8h(<16 x i8>* %
define <4 x i32> @usubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: usubl2_4s:
-;CHECK: usubl2.4s
+;CHECK: usubl.4s
%tmp1 = load <8 x i16>, <8 x i16>* %A
%high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%ext1 = zext <4 x i16> %high1 to <4 x i32>
@@ -265,7 +265,7 @@ define <4 x i32> @usubl2_4s(<8 x i16>* %
define <2 x i64> @usubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: usubl2_2d:
-;CHECK: usubl2.2d
+;CHECK: usubl.2d
%tmp1 = load <4 x i32>, <4 x i32>* %A
%high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%ext1 = zext <2 x i32> %high1 to <2 x i64>
@@ -310,7 +310,7 @@ define <2 x i64> @ssubw2d(<2 x i64>* %A,
define <8 x i16> @ssubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: ssubw2_8h:
-;CHECK: ssubw2.8h
+;CHECK: ssubw.8h
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
@@ -323,7 +323,7 @@ define <8 x i16> @ssubw2_8h(<8 x i16>* %
define <4 x i32> @ssubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: ssubw2_4s:
-;CHECK: ssubw2.4s
+;CHECK: ssubw.4s
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
@@ -336,7 +336,7 @@ define <4 x i32> @ssubw2_4s(<4 x i32>* %
define <2 x i64> @ssubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: ssubw2_2d:
-;CHECK: ssubw2.2d
+;CHECK: ssubw.2d
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <4 x i32>, <4 x i32>* %B
@@ -379,7 +379,7 @@ define <2 x i64> @usubw2d(<2 x i64>* %A,
define <8 x i16> @usubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
;CHECK-LABEL: usubw2_8h:
-;CHECK: usubw2.8h
+;CHECK: usubw.8h
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <16 x i8>, <16 x i8>* %B
@@ -392,7 +392,7 @@ define <8 x i16> @usubw2_8h(<8 x i16>* %
define <4 x i32> @usubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
;CHECK-LABEL: usubw2_4s:
-;CHECK: usubw2.4s
+;CHECK: usubw.4s
%tmp1 = load <4 x i32>, <4 x i32>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
@@ -405,7 +405,7 @@ define <4 x i32> @usubw2_4s(<4 x i32>* %
define <2 x i64> @usubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
;CHECK-LABEL: usubw2_2d:
-;CHECK: usubw2.2d
+;CHECK: usubw.2d
%tmp1 = load <2 x i64>, <2 x i64>* %A
%tmp2 = load <4 x i32>, <4 x i32>* %B
Modified: llvm/trunk/test/CodeGen/ARM/vcombine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vcombine.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vcombine.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vcombine.ll Sat May 27 09:07:03 2017
@@ -99,7 +99,9 @@ define <4 x i16> @vget_low16(<8 x i16>*
define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
; CHECK: vget_high8
; CHECK-NOT: vst
-; CHECK-LE: vmov r0, r1, d17
+; CHECK-LE-NOT: vld1.64 {d16, d17}, [r0]
+; CHECK-LE: vldr d16, [r0, #8]
+; CHECK-LE: vmov r0, r1, d16
; CHECK-BE: vmov r1, r0, d16
%tmp1 = load <16 x i8>, <16 x i8>* %A
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Modified: llvm/trunk/test/CodeGen/ARM/vext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vext.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vext.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vext.ll Sat May 27 09:07:03 2017
@@ -199,10 +199,10 @@ define <4 x i16> @test_interleaved(<8 x
define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; CHECK-LABEL: test_undef:
; CHECK: @ BB#0:
-; CHECK-NEXT: vld1.64 {d16, d17}, [r1]
-; CHECK-NEXT: vld1.64 {d18, d19}, [r0]
-; CHECK-NEXT: vzip.16 d19, d16
-; CHECK-NEXT: vmov r0, r1, d19
+; CHECK-NEXT: vldr d16, [r1]
+; CHECK-NEXT: vldr d17, [r0, #8]
+; CHECK-NEXT: vzip.16 d17, d16
+; CHECK-NEXT: vmov r0, r1, d17
; CHECK-NEXT: mov pc, lr
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Sat May 27 09:07:03 2017
@@ -2216,9 +2216,9 @@ define i32 at test_int_x86_avx512_kunpck_wd
;
; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckwd %k1, %k0, %k0
+; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: kunpckwd %k0, %k1, %k0
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: retl
%res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Sat May 27 09:07:03 2017
@@ -282,8 +282,7 @@ define <16 x i32> @shuffle_v16i32_0_1_2_
define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
; ALL-LABEL: shuffle_v16f32_extract_256:
; ALL: # BB#0:
-; ALL-NEXT: vmovups (%rsi), %zmm0
-; ALL-NEXT: vextractf32x8 $1, %zmm0, %ymm0
+; ALL-NEXT: vmovups 32(%rsi), %ymm0
; ALL-NEXT: retq
%ptr_a = bitcast float* %a to <16 x float>*
%v_a = load <16 x float>, <16 x float>* %ptr_a, align 4
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll Sat May 27 09:07:03 2017
@@ -511,11 +511,10 @@ define <8 x float> @expand14(<4 x float>
;
; KNL64-LABEL: expand14:
; KNL64: # BB#0:
+; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,3,0,0]
+; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; KNL64-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u>
-; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,0,0]
-; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
; KNL64-NEXT: retq
;
@@ -529,11 +528,10 @@ define <8 x float> @expand14(<4 x float>
;
; KNL32-LABEL: expand14:
; KNL32: # BB#0:
+; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,3,0,0]
+; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; KNL32-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u>
-; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,3,0,0]
-; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
; KNL32-NEXT: retl
%addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0>
@@ -545,39 +543,35 @@ define <8 x float> @expand14(<4 x float>
define <8 x float> @expand15(<4 x float> %a) {
; SKX64-LABEL: expand15:
; SKX64: # BB#0:
-; SKX64-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SKX64-NEXT: vmovaps {{.*#+}} ymm0 = <0,2,4,0,u,u,u,u>
-; SKX64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,0,0]
+; SKX64-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
+; SKX64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
; SKX64-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
-; SKX64-NEXT: vpermi2ps %ymm1, %ymm2, %ymm0
+; SKX64-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
; SKX64-NEXT: retq
;
; KNL64-LABEL: expand15:
; KNL64: # BB#0:
+; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
+; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; KNL64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; KNL64-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u>
-; KNL64-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,0]
-; KNL64-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
; KNL64-NEXT: retq
;
; SKX32-LABEL: expand15:
; SKX32: # BB#0:
-; SKX32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,1,3]
-; SKX32-NEXT: vmovaps {{.*#+}} ymm0 = <0,2,4,0,u,u,u,u>
-; SKX32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,0,0]
+; SKX32-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
+; SKX32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
; SKX32-NEXT: vmovaps {{.*#+}} ymm0 = [0,1,8,3,10,3,2,3]
-; SKX32-NEXT: vpermi2ps %ymm1, %ymm2, %ymm0
+; SKX32-NEXT: vpermi2ps %ymm2, %ymm1, %ymm0
; SKX32-NEXT: retl
;
; KNL32-LABEL: expand15:
; KNL32: # BB#0:
+; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = mem[0,1,0,0]
+; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
; KNL32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
-; KNL32-NEXT: vmovaps {{.*#+}} ymm1 = <0,2,4,0,u,u,u,u>
-; KNL32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,1,0,0]
-; KNL32-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,1]
; KNL32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3],ymm0[4],ymm1[5,6,7]
; KNL32-NEXT: retl
%addV = fadd <4 x float> <float 0.0,float 1.0,float 2.0,float 0.0> , <float 0.0,float 1.0,float 2.0,float 0.0>
Modified: llvm/trunk/test/CodeGen/X86/widened-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/widened-broadcast.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/widened-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/widened-broadcast.ll Sat May 27 09:07:03 2017
@@ -151,8 +151,7 @@ define <8 x i32> @load_splat_8i32_8i32_0
;
; AVX1-LABEL: load_splat_8i32_8i32_01010101:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovapd (%rdi), %ymm0
-; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -288,8 +287,7 @@ define <16 x i16> @load_splat_16i16_16i1
;
; AVX1-LABEL: load_splat_16i16_16i16_0101010101010101:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -315,22 +313,10 @@ define <16 x i16> @load_splat_16i16_16i1
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_16i16_16i16_0123012301230123:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_16i16_16i16_0123012301230123:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vmovaps (%rdi), %ymm0
-; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_16i16_16i16_0123012301230123:
-; AVX512: # BB#0: # %entry
-; AVX512-NEXT: vmovaps (%rdi), %ymm0
-; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_16i16_16i16_0123012301230123:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vbroadcastsd (%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <16 x i16>, <16 x i16>* %ptr
%ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -513,8 +499,7 @@ define <32 x i8> @load_splat_32i8_32i8_0
;
; AVX1-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101:
; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovdqa (%rdi), %ymm0
-; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -587,26 +572,10 @@ define <4 x float> @load_splat_4f32_8f32
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
-; AVX1-LABEL: load_splat_4f32_8f32_0000:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_4f32_8f32_0000:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vmovaps (%rdi), %ymm0
-; AVX2-NEXT: vbroadcastss %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_4f32_8f32_0000:
-; AVX512: # BB#0: # %entry
-; AVX512-NEXT: vmovaps (%rdi), %ymm0
-; AVX512-NEXT: vbroadcastss %xmm0, %xmm0
-; AVX512-NEXT: vzeroupper
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_4f32_8f32_0000:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vbroadcastss (%rdi), %xmm0
+; AVX-NEXT: retq
entry:
%ld = load <8 x float>, <8 x float>* %ptr
%ret = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> zeroinitializer
@@ -627,22 +596,10 @@ define <8 x float> @load_splat_8f32_16f3
; SSE42-NEXT: movapd %xmm0, %xmm1
; SSE42-NEXT: retq
;
-; AVX1-LABEL: load_splat_8f32_16f32_89898989:
-; AVX1: # BB#0: # %entry
-; AVX1-NEXT: vbroadcastsd 32(%rdi), %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: load_splat_8f32_16f32_89898989:
-; AVX2: # BB#0: # %entry
-; AVX2-NEXT: vbroadcastsd 32(%rdi), %ymm0
-; AVX2-NEXT: retq
-;
-; AVX512-LABEL: load_splat_8f32_16f32_89898989:
-; AVX512: # BB#0: # %entry
-; AVX512-NEXT: vmovapd (%rdi), %zmm0
-; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0
-; AVX512-NEXT: retq
+; AVX-LABEL: load_splat_8f32_16f32_89898989:
+; AVX: # BB#0: # %entry
+; AVX-NEXT: vbroadcastsd 32(%rdi), %ymm0
+; AVX-NEXT: retq
entry:
%ld = load <16 x float>, <16 x float>* %ptr
%ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9>
Modified: llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll?rev=304072&r1=304071&r2=304072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll (original)
+++ llvm/trunk/test/CodeGen/X86/x86-interleaved-access.ll Sat May 27 09:07:03 2017
@@ -57,10 +57,8 @@ define <4 x double> @load_factorf64_1(<1
; AVX1: # BB#0:
; AVX1-NEXT: vmovups (%rdi), %ymm0
; AVX1-NEXT: vmovups 32(%rdi), %ymm1
-; AVX1-NEXT: vmovups 64(%rdi), %ymm2
-; AVX1-NEXT: vmovups 96(%rdi), %ymm3
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1
; AVX1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX1-NEXT: vmulpd %ymm0, %ymm0, %ymm0
; AVX1-NEXT: retq
@@ -69,10 +67,8 @@ define <4 x double> @load_factorf64_1(<1
; AVX2: # BB#0:
; AVX2-NEXT: vmovupd (%rdi), %ymm0
; AVX2-NEXT: vmovupd 32(%rdi), %ymm1
-; AVX2-NEXT: vmovupd 64(%rdi), %ymm2
-; AVX2-NEXT: vmovupd 96(%rdi), %ymm3
-; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX2-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vinsertf128 $1, 96(%rdi), %ymm1, %ymm1
; AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
; AVX2-NEXT: vmulpd %ymm0, %ymm0, %ymm0
; AVX2-NEXT: retq
More information about the llvm-commits
mailing list