[llvm] r285072 - [X86][SSE] Add support for (V)PMOVSX* constant folding
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 25 07:29:26 PDT 2016
Author: rksimon
Date: Tue Oct 25 09:29:25 2016
New Revision: 285072
URL: http://llvm.org/viewvc/llvm-project?rev=285072&view=rev
Log:
[X86][SSE] Add support for (V)PMOVSX* constant folding
We already have (V)PMOVZX* combining support, this is the beginning of handling (V)PMOVSX* similarly - other combines in combineVSZext can be generalized in future patches.
This unearthed an interesting bug in that we were generating illegal build vectors on 32-bit targets - it was proving difficult to create a test for it from PMOVZX, but it fired immediately with PMOVSX. I've created a more general form of the existing getConstVector to handle these cases - ideally this should be handled in non-target-specific code but I couldn't find an equivalent.
Differential Revision: https://reviews.llvm.org/D25874
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll
llvm/trunk/test/CodeGen/X86/pmul.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=285072&r1=285071&r2=285072&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Oct 25 09:29:25 2016
@@ -4427,6 +4427,40 @@ static SDValue getConstVector(ArrayRef<i
return ConstsNode;
}
+static SDValue getConstVector(ArrayRef<APInt> Values, SmallBitVector &Undefs,
+ MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
+ assert(Values.size() == Undefs.size() && "Unequal constant and undef arrays");
+ SmallVector<SDValue, 32> Ops;
+ bool Split = false;
+
+ MVT ConstVecVT = VT;
+ unsigned NumElts = VT.getVectorNumElements();
+ bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
+ if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
+ ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
+ Split = true;
+ }
+
+ MVT EltVT = ConstVecVT.getVectorElementType();
+ for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+ if (Undefs[i]) {
+ Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
+ continue;
+ }
+ const APInt &V = Values[i];
+ assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes");
+ if (Split) {
+ Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
+ Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
+ } else {
+ Ops.push_back(DAG.getConstant(V, dl, EltVT));
+ }
+ }
+
+ SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
+ return DAG.getBitcast(VT, ConstsNode);
+}
+
/// Returns a vector of specified type with all zero elements.
static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
SelectionDAG &DAG, const SDLoc &dl) {
@@ -31817,10 +31851,11 @@ static SDValue combineSub(SDNode *N, Sel
return OptimizeConditionalInDecrement(N, DAG);
}
-static SDValue combineVZext(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &Subtarget) {
+static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
SDLoc DL(N);
+ unsigned Opcode = N->getOpcode();
MVT VT = N->getSimpleValueType(0);
MVT SVT = VT.getVectorElementType();
SDValue Op = N->getOperand(0);
@@ -31829,25 +31864,28 @@ static SDValue combineVZext(SDNode *N, S
unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements();
// Perform any constant folding.
+ // FIXME: Reduce constant pool usage and don't fold when OptSize is enabled.
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
- SmallVector<SDValue, 4> Vals;
- for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ unsigned NumDstElts = VT.getVectorNumElements();
+ SmallBitVector Undefs(NumDstElts, false);
+ SmallVector<APInt, 4> Vals(NumDstElts, APInt(SVT.getSizeInBits(), 0));
+ for (unsigned i = 0; i != NumDstElts; ++i) {
SDValue OpElt = Op.getOperand(i);
if (OpElt.getOpcode() == ISD::UNDEF) {
- Vals.push_back(DAG.getUNDEF(SVT));
+ Undefs[i] = true;
continue;
}
APInt Cst = cast<ConstantSDNode>(OpElt.getNode())->getAPIntValue();
- assert(Cst.getBitWidth() == OpEltVT.getSizeInBits());
- Cst = Cst.zextOrTrunc(SVT.getSizeInBits());
- Vals.push_back(DAG.getConstant(Cst, DL, SVT));
+ Vals[i] = Opcode == X86ISD::VZEXT ? Cst.zextOrTrunc(SVT.getSizeInBits())
+ : Cst.sextOrTrunc(SVT.getSizeInBits());
}
- return DAG.getBuildVector(VT, DL, Vals);
+ return getConstVector(Vals, Undefs, VT, DAG, DL);
}
// (vzext (bitcast (vzext (x)) -> (vzext x)
+ // TODO: (vsext (bitcast (vsext (x)) -> (vsext x)
SDValue V = peekThroughBitcasts(Op);
- if (V != Op && V.getOpcode() == X86ISD::VZEXT) {
+ if (Opcode == X86ISD::VZEXT && V != Op && V.getOpcode() == X86ISD::VZEXT) {
MVT InnerVT = V.getSimpleValueType();
MVT InnerEltVT = InnerVT.getVectorElementType();
@@ -31872,7 +31910,9 @@ static SDValue combineVZext(SDNode *N, S
// Check if we can bypass extracting and re-inserting an element of an input
// vector. Essentially:
// (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
- if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ // TODO: Add X86ISD::VSEXT support
+ if (Opcode == X86ISD::VZEXT &&
+ V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
SDValue ExtractedV = V.getOperand(0);
@@ -31994,7 +32034,8 @@ SDValue X86TargetLowering::PerformDAGCom
case ISD::SETCC: return combineSetCC(N, DAG, Subtarget);
case X86ISD::SETCC: return combineX86SetCC(N, DAG, DCI, Subtarget);
case X86ISD::BRCOND: return combineBrCond(N, DAG, DCI, Subtarget);
- case X86ISD::VZEXT: return combineVZext(N, DAG, DCI, Subtarget);
+ case X86ISD::VSEXT:
+ case X86ISD::VZEXT: return combineVSZext(N, DAG, DCI, Subtarget);
case X86ISD::SHUFP: // Handle all target specific shuffles
case X86ISD::INSERTPS:
case X86ISD::PALIGNR:
Modified: llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll?rev=285072&r1=285071&r2=285072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fold-vector-sext-zext.ll Tue Oct 25 09:29:25 2016
@@ -83,9 +83,8 @@ define <4 x i32> @test_sext_4i8_4i32_und
define <4 x i64> @test_sext_4i8_4i64() {
; X32-LABEL: test_sext_4i8_4i64:
; X32: # BB#0:
-; X32-NEXT: vpmovsxbq {{\.LCPI.*}}, %xmm0
-; X32-NEXT: vpmovsxbq {{\.LCPI.*}}, %xmm1
-; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [0,0,4294967295,4294967295]
+; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_sext_4i8_4i64:
@@ -104,9 +103,7 @@ define <4 x i64> @test_sext_4i8_4i64_und
; X32-LABEL: test_sext_4i8_4i64_undef:
; X32: # BB#0:
; X32-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; X32-NEXT: vpmovsxbq %xmm0, %xmm0
-; X32-NEXT: vpmovsxbq {{\.LCPI.*}}, %xmm1
-; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X32-NEXT: vinsertf128 $1, {{\.LCPI.*}}, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_sext_4i8_4i64_undef:
Modified: llvm/trunk/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmul.ll?rev=285072&r1=285071&r2=285072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmul.ll Tue Oct 25 09:29:25 2016
@@ -26,7 +26,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %
; SSE41-LABEL: mul_v16i8c:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: pmovsxbw %xmm0, %xmm1
-; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm2
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [117,117,117,117,117,117,117,117]
; SSE41-NEXT: pmullw %xmm2, %xmm1
; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pand %xmm3, %xmm1
@@ -41,8 +41,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %
; AVX2-LABEL: mul_v16i8c:
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1
-; AVX2-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
@@ -54,8 +53,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %
; AVX512F-LABEL: mul_v16i8c:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1
-; AVX512F-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: retq
@@ -63,8 +61,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %
; AVX512BW-LABEL: mul_v16i8c:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0
-; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %ymm1
-; AVX512BW-NEXT: vpmullw %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %ymm0, %ymm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: retq
@@ -418,7 +415,7 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %
; SSE41-LABEL: mul_v32i8c:
; SSE41: # BB#0: # %entry
; SSE41-NEXT: pmovsxbw %xmm0, %xmm2
-; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm4
+; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [117,117,117,117,117,117,117,117]
; SSE41-NEXT: pmullw %xmm4, %xmm2
; SSE41-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pand %xmm5, %xmm2
@@ -443,7 +440,7 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1
-; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3
; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
@@ -462,7 +459,7 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %
; AVX512F-LABEL: mul_v32i8c:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1
-; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512F-NEXT: vpmullw %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
@@ -477,8 +474,7 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %
; AVX512BW-LABEL: mul_v32i8c:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
-; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %zmm1
-; AVX512BW-NEXT: vpmullw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmullw {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
entry:
@@ -833,7 +829,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %
; SSE41-NEXT: movdqa %xmm1, %xmm4
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: pmovsxbw %xmm1, %xmm0
-; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm6
+; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [117,117,117,117,117,117,117,117]
; SSE41-NEXT: pmullw %xmm6, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pand %xmm7, %xmm0
@@ -874,7 +870,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %
; AVX2: # BB#0: # %entry
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2
-; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
@@ -907,7 +903,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %
; AVX512F-LABEL: mul_v64i8c:
; AVX512F: # BB#0: # %entry
; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm2
-; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512F-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
@@ -932,7 +928,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %
; AVX512BW-LABEL: mul_v64i8c:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1
-; AVX512BW-NEXT: vpmovsxbw {{.*}}(%rip), %zmm2
+; AVX512BW-NEXT: vmovdqu16 {{.*#+}} zmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll?rev=285072&r1=285071&r2=285072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll Tue Oct 25 09:29:25 2016
@@ -544,7 +544,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i
; SSE41-NEXT: pand {{.*}}(%rip), %xmm1
; SSE41-NEXT: paddb %xmm2, %xmm1
; SSE41-NEXT: pmovsxbw %xmm1, %xmm2
-; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7]
; SSE41-NEXT: pmullw %xmm3, %xmm2
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pand %xmm4, %xmm2
@@ -577,7 +577,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpmovsxbw %xmm1, %xmm2
-; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7]
; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
@@ -607,8 +607,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpaddb %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1
-; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2
-; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll?rev=285072&r1=285071&r2=285072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll Tue Oct 25 09:29:25 2016
@@ -459,7 +459,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i
; AVX1-NEXT: vpsubb %xmm7, %xmm3, %xmm3
; AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpmovsxbw %xmm3, %xmm4
-; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm5
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [7,7,7,7,7,7,7,7]
; AVX1-NEXT: vpmullw %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vpand %xmm6, %xmm4, %xmm4
@@ -524,7 +524,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i
; AVX2-NEXT: vpaddb %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2
-; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll?rev=285072&r1=285071&r2=285072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll Tue Oct 25 09:29:25 2016
@@ -1439,7 +1439,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
; AVX512F-NEXT: vpsubb %ymm7, %ymm4, %ymm4
; AVX512F-NEXT: vpaddb %ymm8, %ymm4, %ymm8
; AVX512F-NEXT: vpmovsxbw %xmm8, %ymm9
-; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm4
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512F-NEXT: vpmullw %ymm4, %ymm9, %ymm9
; AVX512F-NEXT: vpmovsxwd %ymm9, %zmm9
; AVX512F-NEXT: vpmovdb %zmm9, %xmm9
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll?rev=285072&r1=285071&r2=285072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-128.ll Tue Oct 25 09:29:25 2016
@@ -520,7 +520,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i
; SSE41-NEXT: psrlw $2, %xmm2
; SSE41-NEXT: pand {{.*}}(%rip), %xmm2
; SSE41-NEXT: pmovsxbw %xmm2, %xmm1
-; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm3
+; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7]
; SSE41-NEXT: pmullw %xmm3, %xmm1
; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; SSE41-NEXT: pand %xmm4, %xmm1
@@ -550,7 +550,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i
; AVX1-NEXT: vpsrlw $2, %xmm1, %xmm1
; AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpmovsxbw %xmm1, %xmm2
-; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm3
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7]
; AVX1-NEXT: vpmullw %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm2
@@ -577,8 +577,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i
; AVX2-NEXT: vpsrlw $2, %xmm1, %xmm1
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1
-; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm2
-; AVX2-NEXT: vpmullw %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpmullw {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll?rev=285072&r1=285071&r2=285072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-256.ll Tue Oct 25 09:29:25 2016
@@ -470,7 +470,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3
; AVX1-NEXT: vpmovsxbw %xmm3, %xmm6
-; AVX1-NEXT: vpmovsxbw {{.*}}(%rip), %xmm7
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm7 = [7,7,7,7,7,7,7,7]
; AVX1-NEXT: vpmullw %xmm7, %xmm6, %xmm6
; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
; AVX1-NEXT: vpand %xmm5, %xmm6, %xmm6
@@ -530,7 +530,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i
; AVX2-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX2-NEXT: vpmovsxbw %xmm2, %ymm2
-; AVX2-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX2-NEXT: vpmullw %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
; AVX2-NEXT: vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll?rev=285072&r1=285071&r2=285072&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-udiv-512.ll Tue Oct 25 09:29:25 2016
@@ -1277,7 +1277,7 @@ define <64 x i8> @test_rem7_64i8(<64 x i
; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm7
; AVX512F-NEXT: vpmovsxbw %xmm7, %ymm8
-; AVX512F-NEXT: vpmovsxbw {{.*}}(%rip), %ymm3
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
; AVX512F-NEXT: vpmullw %ymm3, %ymm8, %ymm8
; AVX512F-NEXT: vpmovsxwd %ymm8, %zmm8
; AVX512F-NEXT: vpmovdb %zmm8, %xmm8
More information about the llvm-commits
mailing list