[llvm] r354340 - [X86][AVX] EltsFromConsecutiveLoads - Add BROADCAST lowering support
Eric Christopher via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 26 17:02:06 PST 2019
Hi Simon,
I've come across a crasher after this (compiling up mesa) and have
attached a testcase here. It looks like there are about 4 patches to
revert to get this back to green - I'd like to revert tomorrow if you
can't figure out what's up?
I do apologize if this is inconvenient, let me know if there's
anything else I can do to help.
Thanks!
-eric
On Tue, Feb 19, 2019 at 7:56 AM Simon Pilgrim via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
> Author: rksimon
> Date: Tue Feb 19 07:57:09 2019
> New Revision: 354340
>
> URL: http://llvm.org/viewvc/llvm-project?rev=354340&view=rev
> Log:
> [X86][AVX] EltsFromConsecutiveLoads - Add BROADCAST lowering support
>
> This patch adds scalar/subvector BROADCAST handling to EltsFromConsecutiveLoads.
>
> It mainly shows codegen changes to 32-bit code which failed to handle i64 loads, although 64-bit code is also using this new path to more efficiently combine to a broadcast load.
>
> Differential Revision: https://reviews.llvm.org/D58053
>
> Modified:
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll
> llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
> llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
> llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
> llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics-upgrade.ll
> llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll
> llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
> llvm/trunk/test/CodeGen/X86/i64-mem-copy.ll
> llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll
> llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Feb 19 07:57:09 2019
> @@ -7384,12 +7384,15 @@ static SDValue EltsFromConsecutiveLoads(
> VT.is256BitVector() && !Subtarget.hasInt256())
> return SDValue();
>
> + if (NumElems == 1)
> + return DAG.getBitcast(VT, Elts[FirstLoadedElt]);
> +
> if (IsConsecutiveLoad)
> return CreateLoad(VT, LDBase);
>
> // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
> // vector and a zero vector to clear out the zero elements.
> - if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
> + if (!isAfterLegalize && VT.isVector() && NumElems == VT.getVectorNumElements()) {
> SmallVector<int, 4> ClearMask(NumElems, -1);
> for (unsigned i = 0; i < NumElems; ++i) {
> if (ZeroMask[i])
> @@ -7404,8 +7407,23 @@ static SDValue EltsFromConsecutiveLoads(
> }
> }
>
> - int LoadSize =
> - (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
> + unsigned BaseSize = LDBaseVT.getStoreSizeInBits();
> + int LoadSize = (1 + LastLoadedElt - FirstLoadedElt) * BaseSize;
> +
> + // If the upper half of a ymm/zmm load is undef then just load the lower half.
> + if (VT.is256BitVector() || VT.is512BitVector()) {
> + unsigned HalfNumElems = NumElems / 2;
> + if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnesValue()) {
> + EVT HalfVT =
> + EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems);
> + SDValue HalfLD =
> + EltsFromConsecutiveLoads(HalfVT, Elts.drop_back(HalfNumElems), DL,
> + DAG, Subtarget, isAfterLegalize);
> + if (HalfLD)
> + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
> + HalfLD, DAG.getIntPtrConstant(0, DL));
> + }
> + }
>
> // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
> if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
> @@ -7428,6 +7446,55 @@ static SDValue EltsFromConsecutiveLoads(
> }
> }
>
> + // BROADCAST - match the smallest possible repetition pattern, load that
> + // scalar/subvector element and then broadcast to the entire vector.
> + if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) &&
> + (BaseSize % 8) == 0 && Subtarget.hasAVX() &&
> + (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) {
> + for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
> + unsigned RepeatSize = SubElems * BaseSize;
> + unsigned ScalarSize = std::min(RepeatSize, 64u);
> + if (!Subtarget.hasAVX2() && ScalarSize < 32)
> + continue;
> +
> + bool Match = true;
> + SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(LDBaseVT));
> + for (unsigned i = 0; i != NumElems && Match; ++i) {
> + if (!LoadMask[i])
> + continue;
> + SDValue Elt = peekThroughBitcasts(Elts[i]);
> + if (RepeatedLoads[i % SubElems].isUndef())
> + RepeatedLoads[i % SubElems] = Elt;
> + else
> + Match &= (RepeatedLoads[i % SubElems] == Elt);
> + }
> +
> + // We must have loads at both ends of the repetition.
> + Match &= !RepeatedLoads.front().isUndef();
> + Match &= !RepeatedLoads.back().isUndef();
> + if (!Match)
> + continue;
> +
> + EVT RepeatVT =
> + VT.isInteger() && (RepeatSize != 64 || TLI.isTypeLegal(MVT::i64))
> + ? EVT::getIntegerVT(*DAG.getContext(), ScalarSize)
> + : EVT::getFloatingPointVT(ScalarSize);
> + if (RepeatSize > ScalarSize)
> + RepeatVT = EVT::getVectorVT(*DAG.getContext(), RepeatVT,
> + RepeatSize / ScalarSize);
> + if (SDValue RepeatLoad = EltsFromConsecutiveLoads(
> + RepeatVT, RepeatedLoads, DL, DAG, Subtarget, isAfterLegalize)) {
> + EVT BroadcastVT =
> + EVT::getVectorVT(*DAG.getContext(), RepeatVT.getScalarType(),
> + VT.getSizeInBits() / ScalarSize);
> + unsigned Opcode = RepeatSize > ScalarSize ? X86ISD::SUBV_BROADCAST
> + : X86ISD::VBROADCAST;
> + SDValue Broadcast = DAG.getNode(Opcode, DL, BroadcastVT, RepeatLoad);
> + return DAG.getBitcast(VT, Broadcast);
> + }
> + }
> + }
> +
> return SDValue();
> }
>
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll Tue Feb 19 07:57:09 2019
> @@ -6,9 +6,7 @@ define <4 x i64> @A(i64* %ptr) nounwind
> ; X32-LABEL: A:
> ; X32: ## %bb.0: ## %entry
> ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
> -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
> -; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
> -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
> +; X32-NEXT: vbroadcastsd (%eax), %ymm0
> ; X32-NEXT: retl
> ;
> ; X64-LABEL: A:
> @@ -34,11 +32,9 @@ define <4 x i64> @A2(i64* %ptr, i64* %pt
> ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
> ; X32-NEXT: movl (%ecx), %edx
> ; X32-NEXT: movl 4(%ecx), %esi
> -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
> +; X32-NEXT: vbroadcastsd (%ecx), %ymm0
> ; X32-NEXT: movl %edx, (%eax)
> ; X32-NEXT: movl %esi, 4(%eax)
> -; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
> -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
> ; X32-NEXT: popl %esi
> ; X32-NEXT: retl
> ;
> @@ -590,8 +586,7 @@ define <2 x i64> @G(i64* %ptr) nounwind
> ; X32-LABEL: G:
> ; X32: ## %bb.0: ## %entry
> ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
> -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
> -; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
> +; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
> ; X32-NEXT: retl
> ;
> ; X64-LABEL: G:
> @@ -615,10 +610,9 @@ define <2 x i64> @G2(i64* %ptr, i64* %pt
> ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
> ; X32-NEXT: movl (%ecx), %edx
> ; X32-NEXT: movl 4(%ecx), %esi
> -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
> +; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
> ; X32-NEXT: movl %edx, (%eax)
> ; X32-NEXT: movl %esi, 4(%eax)
> -; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
> ; X32-NEXT: popl %esi
> ; X32-NEXT: retl
> ;
>
> Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Tue Feb 19 07:57:09 2019
> @@ -207,8 +207,7 @@ define <4 x i64> @QQ64(i64* %ptr) nounwi
> ; X32-LABEL: QQ64:
> ; X32: ## %bb.0: ## %entry
> ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
> -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
> -; X32-NEXT: vbroadcastsd %xmm0, %ymm0
> +; X32-NEXT: vbroadcastsd (%eax), %ymm0
> ; X32-NEXT: retl
> ;
> ; X64-LABEL: QQ64:
> @@ -1368,8 +1367,7 @@ define void @isel_crash_4q(i64* %cV_R.ad
> ; X32-NEXT: movl 8(%ebp), %eax
> ; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
> ; X32-NEXT: vmovaps %ymm0, (%esp)
> -; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
> -; X32-NEXT: vbroadcastsd %xmm1, %ymm1
> +; X32-NEXT: vbroadcastsd (%eax), %ymm1
> ; X32-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
> ; X32-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
> ; X32-NEXT: movl %ebp, %esp
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Tue Feb 19 07:57:09 2019
> @@ -60,15 +60,13 @@ declare <16 x i32> @llvm.x86.avx512.mask
> define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) {
> ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
> ; X86: ## %bb.0:
> -; X86-NEXT: vmovq {{[0-9]+}}(%esp), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04]
> -; X86-NEXT: ## xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd1]
> +; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x8c,0x24,0x04,0x00,0x00,0x00]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
> ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
> -; X86-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc1]
> -; X86-NEXT: vpbroadcastq %xmm1, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc9]
> -; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
> -; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
> +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xc1]
> +; X86-NEXT: vmovdqa64 %zmm1, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0xd1]
> +; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
> +; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
> ; X86-NEXT: retl ## encoding: [0xc3]
> ;
> ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
> @@ -2253,9 +2251,7 @@ define <8 x i64> @test_mask_add_epi64_rm
> ; X86-LABEL: test_mask_add_epi64_rmb:
> ; X86: ## %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
> -; X86-NEXT: ## xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x08]
> ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
> ; X86-NEXT: retl ## encoding: [0xc3]
> ;
> @@ -2274,9 +2270,7 @@ define <8 x i64> @test_mask_add_epi64_rm
> ; X86-LABEL: test_mask_add_epi64_rmbk:
> ; X86: ## %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: ## xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpaddq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xca]
> @@ -2300,9 +2294,7 @@ define <8 x i64> @test_mask_add_epi64_rm
> ; X86-LABEL: test_mask_add_epi64_rmbkz:
> ; X86: ## %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
> -; X86-NEXT: ## xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x08]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
> @@ -2427,9 +2419,7 @@ define <8 x i64> @test_mask_sub_epi64_rm
> ; X86-LABEL: test_mask_sub_epi64_rmb:
> ; X86: ## %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
> -; X86-NEXT: ## xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x08]
> ; X86-NEXT: vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
> ; X86-NEXT: retl ## encoding: [0xc3]
> ;
> @@ -2448,9 +2438,7 @@ define <8 x i64> @test_mask_sub_epi64_rm
> ; X86-LABEL: test_mask_sub_epi64_rmbk:
> ; X86: ## %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: ## xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpsubq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xca]
> @@ -2474,9 +2462,7 @@ define <8 x i64> @test_mask_sub_epi64_rm
> ; X86-LABEL: test_mask_sub_epi64_rmbkz:
> ; X86: ## %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
> -; X86-NEXT: ## xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0x08]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll Tue Feb 19 07:57:09 2019
> @@ -2011,9 +2011,7 @@ define <8 x i64> @test_mask_mullo_epi64_
> ; X86-LABEL: test_mask_mullo_epi64_rmb_512:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
> -; X86-NEXT: # xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %zmm1 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm1 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x08]
> ; X86-NEXT: vpmullq %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x40,0xc1]
> ; X86-NEXT: retl # encoding: [0xc3]
> ;
> @@ -2032,9 +2030,7 @@ define <8 x i64> @test_mask_mullo_epi64_
> ; X86-LABEL: test_mask_mullo_epi64_rmbk_512:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
> ; X86-NEXT: vpmullq %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x40,0xca]
> ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
> @@ -2057,9 +2053,7 @@ define <8 x i64> @test_mask_mullo_epi64_
> ; X86-LABEL: test_mask_mullo_epi64_rmbkz_512:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
> -; X86-NEXT: # xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %zmm1 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm1 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x08]
> ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
> ; X86-NEXT: vpmullq %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x40,0xc1]
> ; X86-NEXT: retl # encoding: [0xc3]
> @@ -2178,9 +2172,7 @@ define <4 x i64> @test_mask_mullo_epi64_
> ; X86-LABEL: test_mask_mullo_epi64_rmb_256:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
> -; X86-NEXT: # xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
> +; X86-NEXT: vpbroadcastq (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x08]
> ; X86-NEXT: vpmullq %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x40,0xc1]
> ; X86-NEXT: retl # encoding: [0xc3]
> ;
> @@ -2199,9 +2191,7 @@ define <4 x i64> @test_mask_mullo_epi64_
> ; X86-LABEL: test_mask_mullo_epi64_rmbk_256:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x10]
> ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
> ; X86-NEXT: vpmullq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x40,0xca]
> ; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
> @@ -2224,9 +2214,7 @@ define <4 x i64> @test_mask_mullo_epi64_
> ; X86-LABEL: test_mask_mullo_epi64_rmbkz_256:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
> -; X86-NEXT: # xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
> +; X86-NEXT: vpbroadcastq (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x08]
> ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
> ; X86-NEXT: vpmullq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x40,0xc1]
> ; X86-NEXT: retl # encoding: [0xc3]
> @@ -2718,10 +2706,9 @@ define <8 x i32>@test_int_x86_avx512_mas
> ; X86: # %bb.0:
> ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> +; X86-NEXT: vpbroadcastq (%eax), %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x10]
> ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
> -; X86-NEXT: vinserti32x4 $1, %xmm2, %ymm2, %ymm1 {%k1} # encoding: [0x62,0xf3,0x6d,0x29,0x38,0xca,0x01]
> +; X86-NEXT: vmovdqa32 %ymm2, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xca]
> ; X86-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
> ; X86-NEXT: vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
> ; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics-upgrade.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics-upgrade.ll?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics-upgrade.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics-upgrade.ll Tue Feb 19 07:57:09 2019
> @@ -199,9 +199,7 @@ define <8 x i64>@test_int_x86_avx512_vpm
> ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xc2]
> ; X86-NEXT: retl # encoding: [0xc3]
> ;
> @@ -238,9 +236,7 @@ define <8 x i64>@test_int_x86_avx512_vpm
> ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 # encoding: [0x62,0xf2,0xed,0x48,0xb5,0xc1]
> ; X86-NEXT: retl # encoding: [0xc3]
> ;
> @@ -280,9 +276,7 @@ define <8 x i64>@test_int_x86_avx512_mas
> ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2]
> @@ -325,9 +319,7 @@ define <8 x i64>@test_int_x86_avx512_mas
> ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0xb5,0xc1]
> @@ -370,9 +362,7 @@ define <8 x i64>@test_int_x86_avx512_mas
> ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2]
> @@ -415,9 +405,7 @@ define <8 x i64>@test_int_x86_avx512_mas
> ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xed,0xc9,0xb5,0xc1]
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512ifma-intrinsics.ll Tue Feb 19 07:57:09 2019
> @@ -219,9 +219,7 @@ define <8 x i64>@test_int_x86_avx512_vpm
> ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xb5,0xc2]
> ; X86-NEXT: retl # encoding: [0xc3]
> ;
> @@ -258,9 +256,7 @@ define <8 x i64>@test_int_x86_avx512_vpm
> ; X86-LABEL: test_int_x86_avx512_vpmadd52h_uq_512_load_commute_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 # encoding: [0x62,0xf2,0xed,0x48,0xb5,0xc1]
> ; X86-NEXT: retl # encoding: [0xc3]
> ;
> @@ -302,9 +298,7 @@ define <8 x i64>@test_int_x86_avx512_mas
> ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0xb5,0xc2]
> @@ -351,9 +345,7 @@ define <8 x i64>@test_int_x86_avx512_mas
> ; X86-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_512_load_commute_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0xb5,0xc1]
> @@ -400,9 +392,7 @@ define <8 x i64>@test_int_x86_avx512_mas
> ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xb5,0xc2]
> @@ -449,9 +439,7 @@ define <8 x i64>@test_int_x86_avx512_mas
> ; X86-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_512_load_commute_bcast:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %zmm2 # encoding: [0x62,0xf2,0xfd,0x48,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpmadd52huq %zmm1, %zmm2, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xed,0xc9,0xb5,0xc1]
>
> Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Tue Feb 19 07:57:09 2019
> @@ -40,15 +40,13 @@ declare <2 x i64> @llvm.x86.avx512.mask.
> define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) {
> ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
> ; X86: # %bb.0:
> -; X86-NEXT: vmovq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04]
> -; X86-NEXT: # xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd1]
> +; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x4c,0x24,0x04]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> -; X86-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc1]
> -; X86-NEXT: vpbroadcastq %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc9]
> -; X86-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
> -; X86-NEXT: vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
> +; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1]
> +; X86-NEXT: vmovdqa64 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xd1]
> +; X86-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
> +; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
> ; X86-NEXT: retl # encoding: [0xc3]
> ;
> ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
> @@ -106,15 +104,13 @@ define <2 x i64>@test_int_x86_avx512_mas
> define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) {
> ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
> ; X86: # %bb.0:
> -; X86-NEXT: vmovq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04]
> -; X86-NEXT: # xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd1]
> +; X86-NEXT: vpbroadcastq {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x4c,0x24,0x04]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> -; X86-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc1]
> -; X86-NEXT: vpbroadcastq %xmm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc9]
> -; X86-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1]
> -; X86-NEXT: vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
> +; X86-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc1]
> +; X86-NEXT: vmovdqa64 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0xd1]
> +; X86-NEXT: vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
> +; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
> ; X86-NEXT: retl # encoding: [0xc3]
> ;
> ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
> @@ -3910,9 +3906,7 @@ define <2 x i64> @test_mask_andnot_epi64
> ; X86-LABEL: test_mask_andnot_epi64_rmb_128:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovddup (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x08]
> -; X86-NEXT: # xmm1 = mem[0,0]
> -; X86-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
> +; X86-NEXT: vpandnq (%eax){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x00]
> ; X86-NEXT: retl # encoding: [0xc3]
> ;
> ; X64-LABEL: test_mask_andnot_epi64_rmb_128:
> @@ -4079,10 +4073,7 @@ define <4 x i64> @test_mask_andnot_epi64
> ; X86-LABEL: test_mask_andnot_epi64_rmb_256:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
> -; X86-NEXT: # xmm1 = mem[0],zero
> -; X86-NEXT: vbroadcastsd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc9]
> -; X86-NEXT: vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
> +; X86-NEXT: vpandnq (%eax){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x00]
> ; X86-NEXT: retl # encoding: [0xc3]
> ;
> ; X64-LABEL: test_mask_andnot_epi64_rmb_256:
> @@ -4100,9 +4091,7 @@ define <4 x i64> @test_mask_andnot_epi64
> ; X86-LABEL: test_mask_andnot_epi64_rmbk_256:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
> -; X86-NEXT: # xmm2 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2]
> +; X86-NEXT: vpbroadcastq (%eax), %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x10]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpandnq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xca]
> @@ -4126,9 +4115,7 @@ define <4 x i64> @test_mask_andnot_epi64
> ; X86-LABEL: test_mask_andnot_epi64_rmbkz_256:
> ; X86: # %bb.0:
> ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
> -; X86-NEXT: vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
> -; X86-NEXT: # xmm1 = mem[0],zero
> -; X86-NEXT: vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
> +; X86-NEXT: vpbroadcastq (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x08]
> ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
> ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
> ; X86-NEXT: vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
>
> Modified: llvm/trunk/test/CodeGen/X86/i64-mem-copy.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/i64-mem-copy.ll?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/i64-mem-copy.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/i64-mem-copy.ll Tue Feb 19 07:57:09 2019
> @@ -158,9 +158,9 @@ define void @PR23476(<5 x i64> %in, i64*
> ; X32AVX-NEXT: movl %esp, %ebp
> ; X32AVX-NEXT: andl $-64, %esp
> ; X32AVX-NEXT: subl $128, %esp
> +; X32AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
> ; X32AVX-NEXT: movl 52(%ebp), %eax
> ; X32AVX-NEXT: andl $7, %eax
> -; X32AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
> ; X32AVX-NEXT: movl 48(%ebp), %ecx
> ; X32AVX-NEXT: vmovups 8(%ebp), %ymm1
> ; X32AVX-NEXT: vmovaps %ymm1, (%esp)
>
> Modified: llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/insertelement-shuffle.ll Tue Feb 19 07:57:09 2019
> @@ -81,8 +81,7 @@ define <8 x i64> @insert_subvector_512(i
> define <8 x i64> @insert_subvector_into_undef(i32 %x0, i32 %x1) nounwind {
> ; X86_AVX256-LABEL: insert_subvector_into_undef:
> ; X86_AVX256: # %bb.0:
> -; X86_AVX256-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
> -; X86_AVX256-NEXT: vbroadcastsd %xmm0, %ymm0
> +; X86_AVX256-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
> ; X86_AVX256-NEXT: vmovaps %ymm0, %ymm1
> ; X86_AVX256-NEXT: retl
> ;
>
> Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=354340&r1=354339&r2=354340&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Tue Feb 19 07:57:09 2019
> @@ -219,23 +219,13 @@ define <4 x i32> @combine_vpperm_10zz32B
> ret <4 x i32> %res3
> }
>
> -; FIXME: Duplicated load in i686
> define void @buildvector_v4f32_0404(float %a, float %b, <4 x float>* %ptr) {
> -; X86-AVX-LABEL: buildvector_v4f32_0404:
> -; X86-AVX: # %bb.0:
> -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
> -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
> -; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
> -; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
> -; X86-AVX-NEXT: vmovaps %xmm0, (%eax)
> -; X86-AVX-NEXT: retl
> -;
> -; X86-AVX2-LABEL: buildvector_v4f32_0404:
> -; X86-AVX2: # %bb.0:
> -; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
> -; X86-AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
> -; X86-AVX2-NEXT: vmovaps %xmm0, (%eax)
> -; X86-AVX2-NEXT: retl
> +; X86-LABEL: buildvector_v4f32_0404:
> +; X86: # %bb.0:
> +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
> +; X86-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
> +; X86-NEXT: vmovaps %xmm0, (%eax)
> +; X86-NEXT: retl
> ;
> ; X64-AVX-LABEL: buildvector_v4f32_0404:
> ; X64-AVX: # %bb.0:
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
-------------- next part --------------
A non-text attachment was scrubbed...
Name: bugpoint-reduced-simplified.ll
Type: application/octet-stream
Size: 21519 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190226/a3731264/attachment.obj>
More information about the llvm-commits
mailing list