[llvm] e431b28 - [DAG] CombineConsecutiveLoads - replace getABITypeAlign with allowsMemoryAccess (PR45116)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 24 04:37:27 PDT 2021
Author: Simon Pilgrim
Date: 2021-08-24T12:31:22+01:00
New Revision: e431b280c9aedfd405ec248fbb934bd88863dd2c
URL: https://github.com/llvm/llvm-project/commit/e431b280c9aedfd405ec248fbb934bd88863dd2c
DIFF: https://github.com/llvm/llvm-project/commit/e431b280c9aedfd405ec248fbb934bd88863dd2c.diff
LOG: [DAG] CombineConsecutiveLoads - replace getABITypeAlign with allowsMemoryAccess (PR45116)
One of the cases identified in PR45116 - we don't need to limit load combines (in this case for ISD::BUILD_PAIR) to ABI alignment, we can use allowsMemoryAccess - which tests using getABITypeAlign, but also checks if a target permits (fast) misaligned memory loads by checking allowsMisalignedMemoryAccesses as a fallback.
This helps in particular for 32-bit X86 cases loading 64-bit size data, reducing codegen diffs vs x86_64.
Differential Revision: https://reviews.llvm.org/D108307
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/avx512-mask-op.ll
llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll
llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
llvm/test/CodeGen/X86/pr35982.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
llvm/test/CodeGen/X86/xmulo.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 839787e381153..5a59c50bc7aa1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12566,18 +12566,15 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
+ bool LD1Fast = false;
EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();
- if (DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
- Align Alignment = LD1->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign <= Alignment &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
- return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
- LD1->getPointerInfo(), Alignment);
- }
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
+ LD1->getPointerInfo(), LD1->getAlign());
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 6ebe6a36398a6..11dee95976e97 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -1271,8 +1271,8 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
;
; X86-LABEL: test17:
; X86: ## %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k0
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT: setg %al
; X86-NEXT: kshiftrq $6, %k0, %k1
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
index ac346c502eb8c..d47e7bee65b96 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
@@ -49,8 +49,8 @@ declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
define <64 x i8> @test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) nounwind {
; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
; X86: # %bb.0:
-; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
+; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04]
; X86-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0xc1]
; X86-NEXT: vmovdqu8 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd1]
; X86-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2]
@@ -109,8 +109,8 @@ define void @test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i
; X86-LABEL: test_int_x86_avx512_mask_storeu_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
; X86-NEXT: vmovdqu8 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x01]
; X86-NEXT: vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
@@ -188,9 +188,9 @@ define <64 x i8> @test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64
; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
; X86-NEXT: vmovdqu8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x00]
; X86-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x09]
; X86-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
@@ -1937,66 +1937,47 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind {
; X86-LABEL: test_mask_cmp_b_512:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp # encoding: [0x55]
-; X86-NEXT: pushl %ebx # encoding: [0x53]
; X86-NEXT: pushl %edi # encoding: [0x57]
; X86-NEXT: pushl %esi # encoding: [0x56]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18]
-; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1]
-; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6]
-; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
+; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
+; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
-; X86-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xd0]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
-; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
-; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
-; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
-; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x02]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
+; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
+; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
+; X86-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
+; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1]
+; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0]
+; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
-; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda]
-; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8]
-; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
-; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
-; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
-; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
-; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x05]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
+; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
+; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
+; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
-; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9]
-; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd]
-; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xd1]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9]
+; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
+; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
+; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
+; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
+; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
-; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
-; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea]
-; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18]
+; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
+; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c]
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10]
; X86-NEXT: popl %esi # encoding: [0x5e]
; X86-NEXT: popl %edi # encoding: [0x5f]
-; X86-NEXT: popl %ebx # encoding: [0x5b]
-; X86-NEXT: popl %ebp # encoding: [0x5d]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
@@ -2131,66 +2112,47 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind {
; X86-LABEL: test_mask_x86_avx512_ucmp_b_512:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebp # encoding: [0x55]
-; X86-NEXT: pushl %ebx # encoding: [0x53]
; X86-NEXT: pushl %edi # encoding: [0x57]
; X86-NEXT: pushl %esi # encoding: [0x56]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18]
-; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1]
-; X86-NEXT: kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6]
-; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
+; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
+; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
-; X86-NEXT: vpcmpltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x01]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
-; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
-; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
-; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
-; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x02]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
+; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
+; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
+; X86-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
+; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
+; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
+; X86-NEXT: addl %esi, %ecx # encoding: [0x01,0xf1]
+; X86-NEXT: adcl %edx, %eax # encoding: [0x11,0xd0]
+; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
-; X86-NEXT: addl %ebx, %edx # encoding: [0x01,0xda]
-; X86-NEXT: adcl %edi, %eax # encoding: [0x11,0xf8]
-; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
-; X86-NEXT: kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
-; X86-NEXT: addl %edx, %ebx # encoding: [0x01,0xd3]
-; X86-NEXT: adcl %eax, %edi # encoding: [0x11,0xc7]
-; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x05]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
-; X86-NEXT: kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb]
-; X86-NEXT: kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
+; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
+; X86-NEXT: addl %ecx, %esi # encoding: [0x01,0xce]
+; X86-NEXT: adcl %eax, %edx # encoding: [0x11,0xc2]
+; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
+; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
-; X86-NEXT: addl %ebx, %ecx # encoding: [0x01,0xd9]
-; X86-NEXT: adcl %edi, %ebp # encoding: [0x11,0xfd]
-; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x06]
-; X86-NEXT: kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
-; X86-NEXT: kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9]
+; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
+; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
+; X86-NEXT: adcl %edx, %ecx # encoding: [0x11,0xd1]
+; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
+; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
-; X86-NEXT: kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
-; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
-; X86-NEXT: adcl %ebp, %edx # encoding: [0x11,0xea]
-; X86-NEXT: addl %esi, %eax # encoding: [0x01,0xf0]
-; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18]
+; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
+; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c]
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10]
; X86-NEXT: popl %esi # encoding: [0x5e]
; X86-NEXT: popl %edi # encoding: [0x5f]
-; X86-NEXT: popl %ebx # encoding: [0x5b]
-; X86-NEXT: popl %ebp # encoding: [0x5d]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll
index dc34be5a7b836..eb30f0cb80e92 100644
--- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-upgrade.ll
@@ -283,8 +283,8 @@ declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x
define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; X86-LABEL: test_mask_compress_store_b_512:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
index c1e164252249b..e6db088c00d04 100644
--- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
@@ -287,8 +287,8 @@ define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
; X86-LABEL: test_mask_compress_store_b_512:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
diff --git a/llvm/test/CodeGen/X86/pr35982.ll b/llvm/test/CodeGen/X86/pr35982.ll
index 623fcc650a345..4a79a109f8b60 100644
--- a/llvm/test/CodeGen/X86/pr35982.ll
+++ b/llvm/test/CodeGen/X86/pr35982.ll
@@ -1,29 +1,37 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=false | FileCheck %s
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=false | FileCheck %s --check-prefix=NO-POSTRA
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnowa -post-RA-scheduler=true | FileCheck %s --check-prefix=POSTRA
define float @PR35982_emms(<1 x i64>) nounwind {
-; CHECK-LABEL: PR35982_emms:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushl %ebp
-; CHECK-NEXT: movl %esp, %ebp
-; CHECK-NEXT: andl $-8, %esp
-; CHECK-NEXT: subl $16, %esp
-; CHECK-NEXT: movl 8(%ebp), %eax
-; CHECK-NEXT: movl 12(%ebp), %ecx
-; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; CHECK-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
-; CHECK-NEXT: movd %mm0, %ecx
-; CHECK-NEXT: emms
-; CHECK-NEXT: movl %eax, (%esp)
-; CHECK-NEXT: fildl (%esp)
-; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: fiaddl {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl %ebp, %esp
-; CHECK-NEXT: popl %ebp
-; CHECK-NEXT: retl
+; NO-POSTRA-LABEL: PR35982_emms:
+; NO-POSTRA: # %bb.0:
+; NO-POSTRA-NEXT: subl $8, %esp
+; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; NO-POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; NO-POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
+; NO-POSTRA-NEXT: movd %mm0, %ecx
+; NO-POSTRA-NEXT: emms
+; NO-POSTRA-NEXT: movl %eax, (%esp)
+; NO-POSTRA-NEXT: fildl (%esp)
+; NO-POSTRA-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; NO-POSTRA-NEXT: fiaddl {{[0-9]+}}(%esp)
+; NO-POSTRA-NEXT: addl $8, %esp
+; NO-POSTRA-NEXT: retl
+;
+; POSTRA-LABEL: PR35982_emms:
+; POSTRA: # %bb.0:
+; POSTRA-NEXT: subl $8, %esp
+; POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
+; POSTRA-NEXT: movd %mm0, %ecx
+; POSTRA-NEXT: emms
+; POSTRA-NEXT: movl %eax, (%esp)
+; POSTRA-NEXT: fildl (%esp)
+; POSTRA-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; POSTRA-NEXT: fiaddl {{[0-9]+}}(%esp)
+; POSTRA-NEXT: addl $8, %esp
+; POSTRA-NEXT: retl
%2 = bitcast <1 x i64> %0 to <2 x i32>
%3 = extractelement <2 x i32> %2, i32 0
%4 = extractelement <1 x i64> %0, i32 0
@@ -39,27 +47,35 @@ define float @PR35982_emms(<1 x i64>) nounwind {
}
define float @PR35982_femms(<1 x i64>) nounwind {
-; CHECK-LABEL: PR35982_femms:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushl %ebp
-; CHECK-NEXT: movl %esp, %ebp
-; CHECK-NEXT: andl $-8, %esp
-; CHECK-NEXT: subl $16, %esp
-; CHECK-NEXT: movl 8(%ebp), %eax
-; CHECK-NEXT: movl 12(%ebp), %ecx
-; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; CHECK-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
-; CHECK-NEXT: movd %mm0, %ecx
-; CHECK-NEXT: femms
-; CHECK-NEXT: movl %eax, (%esp)
-; CHECK-NEXT: fildl (%esp)
-; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: fiaddl {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl %ebp, %esp
-; CHECK-NEXT: popl %ebp
-; CHECK-NEXT: retl
+; NO-POSTRA-LABEL: PR35982_femms:
+; NO-POSTRA: # %bb.0:
+; NO-POSTRA-NEXT: subl $8, %esp
+; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; NO-POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; NO-POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
+; NO-POSTRA-NEXT: movd %mm0, %ecx
+; NO-POSTRA-NEXT: femms
+; NO-POSTRA-NEXT: movl %eax, (%esp)
+; NO-POSTRA-NEXT: fildl (%esp)
+; NO-POSTRA-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; NO-POSTRA-NEXT: fiaddl {{[0-9]+}}(%esp)
+; NO-POSTRA-NEXT: addl $8, %esp
+; NO-POSTRA-NEXT: retl
+;
+; POSTRA-LABEL: PR35982_femms:
+; POSTRA: # %bb.0:
+; POSTRA-NEXT: subl $8, %esp
+; POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax
+; POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
+; POSTRA-NEXT: movd %mm0, %ecx
+; POSTRA-NEXT: femms
+; POSTRA-NEXT: movl %eax, (%esp)
+; POSTRA-NEXT: fildl (%esp)
+; POSTRA-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; POSTRA-NEXT: fiaddl {{[0-9]+}}(%esp)
+; POSTRA-NEXT: addl $8, %esp
+; POSTRA-NEXT: retl
%2 = bitcast <1 x i64> %0 to <2 x i32>
%3 = extractelement <2 x i32> %2, i32 0
%4 = extractelement <1 x i64> %0, i32 0
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
index d5115919f1dcf..442e7c4d373f0 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
@@ -54,9 +54,9 @@ define <64 x i8> @combine_pshufb_identity(<64 x i8> %x0) {
define <64 x i8> @combine_pshufb_identity_mask(<64 x i8> %x0, i64 %m) {
; X86-LABEL: combine_pshufb_identity_mask:
; X86: # %bb.0:
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1
; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3
; X86-NEXT: vpshufb %zmm2, %zmm0, %zmm3 {%k1}
; X86-NEXT: vpshufb %zmm2, %zmm3, %zmm1 {%k1}
@@ -100,8 +100,8 @@ define <64 x i8> @combine_pshufb_as_pslldq(<64 x i8> %a0) {
define <64 x i8> @combine_pshufb_as_pslldq_mask(<64 x i8> %a0, i64 %m) {
; X86-LABEL: combine_pshufb_as_pslldq_mask:
; X86: # %bb.0:
-; X86-NEXT: vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vpslldq {{.*#+}} zmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53]
; X86-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -126,8 +126,8 @@ define <64 x i8> @combine_pshufb_as_psrldq(<64 x i8> %a0) {
define <64 x i8> @combine_pshufb_as_psrldq_mask(<64 x i8> %a0, i64 %m) {
; X86-LABEL: combine_pshufb_as_psrldq_mask:
; X86: # %bb.0:
-; X86-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vpsrldq {{.*#+}} zmm0 = zmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[47],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zmm0[63],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT: retl
;
@@ -156,9 +156,9 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d(<8 x i64> %a0, <8 x i64> %a1
define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64> %a1, i64 %m) {
; X86-LABEL: combine_permi2q_pshufb_as_permi2d_mask:
; X86: # %bb.0:
+; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = <7,0,u,u,5,0,u,u,u,u,12,0,u,u,14,0>
; X86-NEXT: vpermi2q %zmm0, %zmm1, %zmm2
-; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; X86-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm2[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,20,21,22,23,20,21,22,23,20,21,22,23,20,21,22,23,40,41,42,43,40,41,42,43,40,41,42,43,40,41,42,43,60,61,62,63,60,61,62,63,60,61,62,63,60,61,62,63]
; X86-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
index ef7d8fc1b79c5..86c206c6b1875 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512vbmi.ll
@@ -128,8 +128,8 @@ define <64 x i8> @combine_permi2q_pshufb_as_permi2d(<8 x i64> %a0, <8 x i64> %a1
define <64 x i8> @combine_permi2q_pshufb_as_permi2d_mask(<8 x i64> %a0, <8 x i64> %a1, i64 %m) {
; X86-LABEL: combine_permi2q_pshufb_as_permi2d_mask:
; X86: # %bb.0:
-; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = [56,57,58,59,56,57,58,59,56,57,58,59,56,57,58,59,44,45,46,47,44,45,46,47,44,45,46,47,44,45,46,47,96,97,98,99,96,97,98,99,96,97,98,99,96,97,98,99,116,117,118,119,116,117,118,119,116,117,118,119,116,117,118,119]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1
+; X86-NEXT: vmovdqa64 {{.*#+}} zmm2 = [56,57,58,59,56,57,58,59,56,57,58,59,56,57,58,59,44,45,46,47,44,45,46,47,44,45,46,47,44,45,46,47,96,97,98,99,96,97,98,99,96,97,98,99,96,97,98,99,116,117,118,119,116,117,118,119,116,117,118,119,116,117,118,119]
; X86-NEXT: vpermi2b %zmm0, %zmm1, %zmm2 {%k1} {z}
; X86-NEXT: vmovdqa64 %zmm2, %zmm0
; X86-NEXT: retl
diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
index 46bd004b867a8..4553c548b9260 100644
--- a/llvm/test/CodeGen/X86/xmulo.ll
+++ b/llvm/test/CodeGen/X86/xmulo.ll
@@ -1197,23 +1197,23 @@ define zeroext i1 @umulobri64(i64 %v1, i64 %v2) {
; WIN32-NEXT: testl %esi, %esi
; WIN32-NEXT: setne %dl
; WIN32-NEXT: testl %eax, %eax
-; WIN32-NEXT: setne %bl
-; WIN32-NEXT: andb %dl, %bl
+; WIN32-NEXT: setne %cl
+; WIN32-NEXT: andb %dl, %cl
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
; WIN32-NEXT: movl %eax, %edi
-; WIN32-NEXT: seto %bh
+; WIN32-NEXT: seto %bl
; WIN32-NEXT: movl %esi, %eax
; WIN32-NEXT: mull %ebp
; WIN32-NEXT: movl %eax, %esi
-; WIN32-NEXT: seto %cl
-; WIN32-NEXT: orb %bh, %cl
+; WIN32-NEXT: seto %ch
+; WIN32-NEXT: orb %bl, %ch
; WIN32-NEXT: addl %edi, %esi
; WIN32-NEXT: movl %ebp, %eax
; WIN32-NEXT: mull {{[0-9]+}}(%esp)
; WIN32-NEXT: addl %esi, %edx
; WIN32-NEXT: setb %al
+; WIN32-NEXT: orb %ch, %al
; WIN32-NEXT: orb %cl, %al
-; WIN32-NEXT: orb %bl, %al
; WIN32-NEXT: subb $1, %al
; WIN32-NEXT: je LBB22_1
; WIN32-NEXT: # %bb.3: # %continue
More information about the llvm-commits
mailing list