[llvm] 96b0b9a - [X86] Enable shrink-wrapping for no-frame-pointer non-nounwind functions on platforms not using compact unwind

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 4 16:51:56 PST 2020


Author: Fangrui Song
Date: 2020-11-04T16:51:48-08:00
New Revision: 96b0b9a5e3c2a038c8590b2d71fa6cc03dc96aa4

URL: https://github.com/llvm/llvm-project/commit/96b0b9a5e3c2a038c8590b2d71fa6cc03dc96aa4
DIFF: https://github.com/llvm/llvm-project/commit/96b0b9a5e3c2a038c8590b2d71fa6cc03dc96aa4.diff

LOG: [X86] Enable shrink-wrapping for no-frame-pointer non-nounwind functions on platforms not using compact unwind

The current compact unwind scheme does not work when the prologue is not at the
start (the instructions before the prologue cannot be described).  (Technically
this is fixable, but it requires multiple compact unwind descriptors for one
function.)

rL255175 chose to not perform shrink-wrapping for no-frame-pointer functions not
marked as nounwind to work around PR25614. This is overly limited, as platforms
not supporting compact unwind (all non-Darwin) does not need the workaround.
This patch restricts the limitation to compact unwind platforms.

Reviewed By: qcolombet

Differential Revision: https://reviews.llvm.org/D89930

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86FrameLowering.cpp
    llvm/test/CodeGen/X86/absolute-cmp.ll
    llvm/test/CodeGen/X86/avx512-mask-op.ll
    llvm/test/CodeGen/X86/live-out-reg-info.ll
    llvm/test/CodeGen/X86/swifterror.ll
    llvm/test/CodeGen/X86/test-vs-bittest.ll
    llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 05b4c94008ae..1d906ebb6584 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -28,6 +28,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetOptions.h"
@@ -3256,10 +3257,14 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
 bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
   // If we may need to emit frameless compact unwind information, give
   // up as this is currently broken: PR25614.
-  return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) &&
-         // The lowering of segmented stack and HiPE only support entry blocks
-         // as prologue blocks: PR26107.
-         // This limitation may be lifted if we fix:
+  bool CompactUnwind =
+      MF.getMMI().getContext().getObjectFileInfo()->getCompactUnwindSection() !=
+      nullptr;
+  return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) ||
+          !CompactUnwind) &&
+         // The lowering of segmented stack and HiPE only support entry
+         // blocks as prologue blocks: PR26107. This limitation may be
+         // lifted if we fix:
          // - adjustForSegmentedStacks
          // - adjustForHiPEPrologue
          MF.getFunction().getCallingConv() != CallingConv::HiPE &&

diff  --git a/llvm/test/CodeGen/X86/absolute-cmp.ll b/llvm/test/CodeGen/X86/absolute-cmp.ll
index b4f158aa7c91..99249e55dfea 100644
--- a/llvm/test/CodeGen/X86/absolute-cmp.ll
+++ b/llvm/test/CodeGen/X86/absolute-cmp.ll
@@ -13,34 +13,34 @@ declare void @f()
 define void @foo8(i64 %val) {
 ; NOPIC-LABEL: foo8:
 ; NOPIC:       # %bb.0:
-; NOPIC-NEXT:    pushq %rax # encoding: [0x50]
-; NOPIC-NEXT:    .cfi_def_cfa_offset 16
 ; NOPIC-NEXT:    cmpq $cmp8 at ABS8, %rdi # encoding: [0x48,0x83,0xff,A]
 ; NOPIC-NEXT:    # fixup A - offset: 3, value: cmp8 at ABS8, kind: FK_Data_1
 ; NOPIC-NEXT:    ja .LBB0_2 # encoding: [0x77,A]
 ; NOPIC-NEXT:    # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
 ; NOPIC-NEXT:  # %bb.1: # %t
+; NOPIC-NEXT:    pushq %rax # encoding: [0x50]
+; NOPIC-NEXT:    .cfi_def_cfa_offset 16
 ; NOPIC-NEXT:    callq f # encoding: [0xe8,A,A,A,A]
 ; NOPIC-NEXT:    # fixup A - offset: 1, value: f-4, kind: reloc_branch_4byte_pcrel
-; NOPIC-NEXT:  .LBB0_2: # %f
 ; NOPIC-NEXT:    popq %rax # encoding: [0x58]
 ; NOPIC-NEXT:    .cfi_def_cfa_offset 8
+; NOPIC-NEXT:  .LBB0_2: # %f
 ; NOPIC-NEXT:    retq # encoding: [0xc3]
 ;
 ; PIC-LABEL: foo8:
 ; PIC:       # %bb.0:
-; PIC-NEXT:    pushq %rax # encoding: [0x50]
-; PIC-NEXT:    .cfi_def_cfa_offset 16
 ; PIC-NEXT:    cmpq $cmp8 at ABS8, %rdi # encoding: [0x48,0x83,0xff,A]
 ; PIC-NEXT:    # fixup A - offset: 3, value: cmp8 at ABS8, kind: FK_Data_1
 ; PIC-NEXT:    ja .LBB0_2 # encoding: [0x77,A]
 ; PIC-NEXT:    # fixup A - offset: 1, value: .LBB0_2-1, kind: FK_PCRel_1
 ; PIC-NEXT:  # %bb.1: # %t
+; PIC-NEXT:    pushq %rax # encoding: [0x50]
+; PIC-NEXT:    .cfi_def_cfa_offset 16
 ; PIC-NEXT:    callq f at PLT # encoding: [0xe8,A,A,A,A]
 ; PIC-NEXT:    # fixup A - offset: 1, value: f at PLT-4, kind: FK_PCRel_4
-; PIC-NEXT:  .LBB0_2: # %f
 ; PIC-NEXT:    popq %rax # encoding: [0x58]
 ; PIC-NEXT:    .cfi_def_cfa_offset 8
+; PIC-NEXT:  .LBB0_2: # %f
 ; PIC-NEXT:    retq # encoding: [0xc3]
   %cmp = icmp ule i64 %val, ptrtoint (i8* @cmp8 to i64)
   br i1 %cmp, label %t, label %f
@@ -56,34 +56,34 @@ f:
 define void @foo32(i64 %val) {
 ; NOPIC-LABEL: foo32:
 ; NOPIC:       # %bb.0:
-; NOPIC-NEXT:    pushq %rax # encoding: [0x50]
-; NOPIC-NEXT:    .cfi_def_cfa_offset 16
 ; NOPIC-NEXT:    cmpq $cmp32, %rdi # encoding: [0x48,0x81,0xff,A,A,A,A]
 ; NOPIC-NEXT:    # fixup A - offset: 3, value: cmp32, kind: reloc_signed_4byte
 ; NOPIC-NEXT:    ja .LBB1_2 # encoding: [0x77,A]
 ; NOPIC-NEXT:    # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
 ; NOPIC-NEXT:  # %bb.1: # %t
+; NOPIC-NEXT:    pushq %rax # encoding: [0x50]
+; NOPIC-NEXT:    .cfi_def_cfa_offset 16
 ; NOPIC-NEXT:    callq f # encoding: [0xe8,A,A,A,A]
 ; NOPIC-NEXT:    # fixup A - offset: 1, value: f-4, kind: reloc_branch_4byte_pcrel
-; NOPIC-NEXT:  .LBB1_2: # %f
 ; NOPIC-NEXT:    popq %rax # encoding: [0x58]
 ; NOPIC-NEXT:    .cfi_def_cfa_offset 8
+; NOPIC-NEXT:  .LBB1_2: # %f
 ; NOPIC-NEXT:    retq # encoding: [0xc3]
 ;
 ; PIC-LABEL: foo32:
 ; PIC:       # %bb.0:
-; PIC-NEXT:    pushq %rax # encoding: [0x50]
-; PIC-NEXT:    .cfi_def_cfa_offset 16
 ; PIC-NEXT:    cmpq $cmp32, %rdi # encoding: [0x48,0x81,0xff,A,A,A,A]
 ; PIC-NEXT:    # fixup A - offset: 3, value: cmp32, kind: reloc_signed_4byte
 ; PIC-NEXT:    ja .LBB1_2 # encoding: [0x77,A]
 ; PIC-NEXT:    # fixup A - offset: 1, value: .LBB1_2-1, kind: FK_PCRel_1
 ; PIC-NEXT:  # %bb.1: # %t
+; PIC-NEXT:    pushq %rax # encoding: [0x50]
+; PIC-NEXT:    .cfi_def_cfa_offset 16
 ; PIC-NEXT:    callq f at PLT # encoding: [0xe8,A,A,A,A]
 ; PIC-NEXT:    # fixup A - offset: 1, value: f at PLT-4, kind: FK_PCRel_4
-; PIC-NEXT:  .LBB1_2: # %f
 ; PIC-NEXT:    popq %rax # encoding: [0x58]
 ; PIC-NEXT:    .cfi_def_cfa_offset 8
+; PIC-NEXT:  .LBB1_2: # %f
 ; PIC-NEXT:    retq # encoding: [0xc3]
   %cmp = icmp ule i64 %val, ptrtoint (i8* @cmp32 to i64)
   br i1 %cmp, label %t, label %f

diff  --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 67067e3fff27..8fa7fcc21e62 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -4083,94 +4083,89 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) {
 define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
 ; KNL-LABEL: ktest_signed:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    pushq %rax
-; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    testw %ax, %ax
 ; KNL-NEXT:    jle LBB66_1
 ; KNL-NEXT:  ## %bb.2: ## %bb.2
-; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ; KNL-NEXT:  LBB66_1: ## %bb.1
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
-; KNL-NEXT:    popq %rax
+; KNL-NEXT:    addq $8, %rsp
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: ktest_signed:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    pushq %rax
-; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    testw %ax, %ax
 ; SKX-NEXT:    jle LBB66_1
 ; SKX-NEXT:  ## %bb.2: ## %bb.2
-; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
 ; SKX-NEXT:  LBB66_1: ## %bb.1
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
-; SKX-NEXT:    popq %rax
+; SKX-NEXT:    addq $8, %rsp
 ; SKX-NEXT:    retq
 ;
 ; AVX512BW-LABEL: ktest_signed:
 ; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    pushq %rax
-; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testw %ax, %ax
 ; AVX512BW-NEXT:    jle LBB66_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %bb.2
-; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ; AVX512BW-NEXT:  LBB66_1: ## %bb.1
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
-; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    addq $8, %rsp
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: ktest_signed:
 ; AVX512DQ:       ## %bb.0:
-; AVX512DQ-NEXT:    pushq %rax
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512DQ-NEXT:    kmovw %k0, %eax
 ; AVX512DQ-NEXT:    testw %ax, %ax
 ; AVX512DQ-NEXT:    jle LBB66_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %bb.2
-; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ; AVX512DQ-NEXT:  LBB66_1: ## %bb.1
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
-; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
 ; X86-LABEL: ktest_signed:
 ; X86:       ## %bb.0:
-; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; X86-NEXT:    kmovd %k0, %eax
 ; X86-NEXT:    testw %ax, %ax
 ; X86-NEXT:    jle LBB66_1
 ; X86-NEXT:  ## %bb.2: ## %bb.2
-; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
 ; X86-NEXT:  LBB66_1: ## %bb.1
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp
@@ -4193,33 +4188,33 @@ declare void @foo()
 define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
 ; CHECK-LABEL: ktest_allones:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; CHECK-NEXT:    kortestw %k0, %k0
 ; CHECK-NEXT:    jb LBB67_2
 ; CHECK-NEXT:  ## %bb.1: ## %bb.1
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    callq _foo
+; CHECK-NEXT:    addq $8, %rsp
 ; CHECK-NEXT:  LBB67_2: ## %bb.2
-; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
 ;
 ; X86-LABEL: ktest_allones:
 ; X86:       ## %bb.0:
-; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vpord %zmm1, %zmm0, %zmm0
 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; X86-NEXT:    kortestw %k0, %k0
 ; X86-NEXT:    jb LBB67_2
 ; X86-NEXT:  ## %bb.1: ## %bb.1
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
-; X86-NEXT:  LBB67_2: ## %bb.2
 ; X86-NEXT:    addl $12, %esp
+; X86-NEXT:  LBB67_2: ## %bb.2
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
   %a = icmp eq <16 x i32> %x, zeroinitializer
@@ -4411,8 +4406,6 @@ declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <1
 define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
 ; KNL-LABEL: ktest_3:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    pushq %rax
-; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
 ; KNL-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
 ; KNL-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
@@ -4428,19 +4421,18 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
 ; KNL-NEXT:    testb %al, %al
 ; KNL-NEXT:    je LBB74_1
 ; KNL-NEXT:  ## %bb.2: ## %exit
-; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ; KNL-NEXT:  LBB74_1: ## %bar
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
-; KNL-NEXT:    popq %rax
+; KNL-NEXT:    addq $8, %rsp
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: ktest_3:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    pushq %rax
-; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vptestnmd %ymm0, %ymm0, %k0
 ; SKX-NEXT:    vptestnmd %ymm1, %ymm1, %k1
 ; SKX-NEXT:    korb %k1, %k0, %k0
@@ -4450,19 +4442,18 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
 ; SKX-NEXT:    ktestb %k1, %k0
 ; SKX-NEXT:    je LBB74_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
-; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
 ; SKX-NEXT:  LBB74_1: ## %bar
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
-; SKX-NEXT:    popq %rax
+; SKX-NEXT:    addq $8, %rsp
 ; SKX-NEXT:    retq
 ;
 ; AVX512BW-LABEL: ktest_3:
 ; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    pushq %rax
-; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
 ; AVX512BW-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512BW-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
@@ -4478,19 +4469,18 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
 ; AVX512BW-NEXT:    testb %al, %al
 ; AVX512BW-NEXT:    je LBB74_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
-; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ; AVX512BW-NEXT:  LBB74_1: ## %bar
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
-; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    addq $8, %rsp
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: ktest_3:
 ; AVX512DQ:       ## %bb.0:
-; AVX512DQ-NEXT:    pushq %rax
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
 ; AVX512DQ-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
 ; AVX512DQ-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
@@ -4504,19 +4494,18 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
 ; AVX512DQ-NEXT:    ktestb %k1, %k0
 ; AVX512DQ-NEXT:    je LBB74_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
-; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ; AVX512DQ-NEXT:  LBB74_1: ## %bar
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
-; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
 ; X86-LABEL: ktest_3:
 ; X86:       ## %bb.0:
-; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vptestnmd %ymm0, %ymm0, %k0
 ; X86-NEXT:    vptestnmd %ymm1, %ymm1, %k1
 ; X86-NEXT:    korb %k1, %k0, %k0
@@ -4526,10 +4515,11 @@ define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
 ; X86-NEXT:    ktestb %k1, %k0
 ; X86-NEXT:    je LBB74_1
 ; X86-NEXT:  ## %bb.2: ## %exit
-; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
 ; X86-NEXT:  LBB74_1: ## %bar
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp
@@ -4556,8 +4546,6 @@ exit:
 define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
 ; KNL-LABEL: ktest_4:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    pushq %rax
-; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k1
 ; KNL-NEXT:    vptestnmq %zmm2, %zmm2, %k2
@@ -4569,19 +4557,18 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
 ; KNL-NEXT:    testb %al, %al
 ; KNL-NEXT:    je LBB75_1
 ; KNL-NEXT:  ## %bb.2: ## %exit
-; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ; KNL-NEXT:  LBB75_1: ## %bar
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
-; KNL-NEXT:    popq %rax
+; KNL-NEXT:    addq $8, %rsp
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: ktest_4:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    pushq %rax
-; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k1
 ; SKX-NEXT:    korb %k1, %k0, %k0
@@ -4591,19 +4578,18 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
 ; SKX-NEXT:    ktestb %k1, %k0
 ; SKX-NEXT:    je LBB75_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
-; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
 ; SKX-NEXT:  LBB75_1: ## %bar
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
-; SKX-NEXT:    popq %rax
+; SKX-NEXT:    addq $8, %rsp
 ; SKX-NEXT:    retq
 ;
 ; AVX512BW-LABEL: ktest_4:
 ; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    pushq %rax
-; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    vptestnmq %zmm1, %zmm1, %k1
 ; AVX512BW-NEXT:    vptestnmq %zmm2, %zmm2, %k2
@@ -4615,19 +4601,18 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
 ; AVX512BW-NEXT:    testb %al, %al
 ; AVX512BW-NEXT:    je LBB75_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
-; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ; AVX512BW-NEXT:  LBB75_1: ## %bar
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
-; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    addq $8, %rsp
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: ktest_4:
 ; AVX512DQ:       ## %bb.0:
-; AVX512DQ-NEXT:    pushq %rax
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512DQ-NEXT:    vptestnmq %zmm1, %zmm1, %k1
 ; AVX512DQ-NEXT:    korb %k1, %k0, %k0
@@ -4637,19 +4622,18 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
 ; AVX512DQ-NEXT:    ktestb %k1, %k0
 ; AVX512DQ-NEXT:    je LBB75_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
-; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ; AVX512DQ-NEXT:  LBB75_1: ## %bar
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
-; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
 ; X86-LABEL: ktest_4:
 ; X86:       ## %bb.0:
-; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; X86-NEXT:    vptestnmq %zmm1, %zmm1, %k1
 ; X86-NEXT:    korb %k1, %k0, %k0
@@ -4659,10 +4643,11 @@ define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
 ; X86-NEXT:    ktestb %k1, %k0
 ; X86-NEXT:    je LBB75_1
 ; X86-NEXT:  ## %bb.2: ## %exit
-; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
 ; X86-NEXT:  LBB75_1: ## %bar
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp
@@ -4689,8 +4674,6 @@ exit:
 define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
 ; KNL-LABEL: ktest_5:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    pushq %rax
-; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; KNL-NEXT:    vptestnmd %zmm1, %zmm1, %k1
 ; KNL-NEXT:    korw %k1, %k0, %k0
@@ -4701,19 +4684,18 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
 ; KNL-NEXT:    kortestw %k0, %k0
 ; KNL-NEXT:    je LBB76_1
 ; KNL-NEXT:  ## %bb.2: ## %exit
-; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ; KNL-NEXT:  LBB76_1: ## %bar
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
-; KNL-NEXT:    popq %rax
+; KNL-NEXT:    addq $8, %rsp
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: ktest_5:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    pushq %rax
-; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; SKX-NEXT:    vptestnmd %zmm1, %zmm1, %k1
 ; SKX-NEXT:    korw %k1, %k0, %k0
@@ -4723,19 +4705,18 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
 ; SKX-NEXT:    ktestw %k1, %k0
 ; SKX-NEXT:    je LBB76_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
-; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
 ; SKX-NEXT:  LBB76_1: ## %bar
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
-; SKX-NEXT:    popq %rax
+; SKX-NEXT:    addq $8, %rsp
 ; SKX-NEXT:    retq
 ;
 ; AVX512BW-LABEL: ktest_5:
 ; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    pushq %rax
-; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    vptestnmd %zmm1, %zmm1, %k1
 ; AVX512BW-NEXT:    korw %k1, %k0, %k0
@@ -4746,19 +4727,18 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
 ; AVX512BW-NEXT:    kortestw %k0, %k0
 ; AVX512BW-NEXT:    je LBB76_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
-; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ; AVX512BW-NEXT:  LBB76_1: ## %bar
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
-; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    addq $8, %rsp
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: ktest_5:
 ; AVX512DQ:       ## %bb.0:
-; AVX512DQ-NEXT:    pushq %rax
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512DQ-NEXT:    vptestnmd %zmm1, %zmm1, %k1
 ; AVX512DQ-NEXT:    korw %k1, %k0, %k0
@@ -4768,19 +4748,18 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
 ; AVX512DQ-NEXT:    ktestw %k1, %k0
 ; AVX512DQ-NEXT:    je LBB76_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
-; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ; AVX512DQ-NEXT:  LBB76_1: ## %bar
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
-; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
 ; X86-LABEL: ktest_5:
 ; X86:       ## %bb.0:
-; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; X86-NEXT:    vptestnmd %zmm1, %zmm1, %k1
 ; X86-NEXT:    korw %k1, %k0, %k0
@@ -4790,10 +4769,11 @@ define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
 ; X86-NEXT:    ktestw %k1, %k0
 ; X86-NEXT:    je LBB76_1
 ; X86-NEXT:  ## %bb.2: ## %exit
-; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
 ; X86-NEXT:  LBB76_1: ## %bar
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp
@@ -4820,8 +4800,6 @@ exit:
 define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) {
 ; KNL-LABEL: ktest_6:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    pushq %rax
-; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
 ; KNL-NEXT:    vpxor %xmm5, %xmm5, %xmm5
 ; KNL-NEXT:    vpcmpeqw %ymm5, %ymm4, %ymm4
@@ -4852,19 +4830,18 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
 ; KNL-NEXT:    orl %eax, %ecx
 ; KNL-NEXT:    je LBB77_1
 ; KNL-NEXT:  ## %bb.2: ## %exit
-; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ; KNL-NEXT:  LBB77_1: ## %bar
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
-; KNL-NEXT:    popq %rax
+; KNL-NEXT:    addq $8, %rsp
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: ktest_6:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    pushq %rax
-; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; SKX-NEXT:    vptestnmw %zmm1, %zmm1, %k1
 ; SKX-NEXT:    kord %k1, %k0, %k0
@@ -4874,19 +4851,18 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
 ; SKX-NEXT:    ktestd %k1, %k0
 ; SKX-NEXT:    je LBB77_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
-; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
 ; SKX-NEXT:  LBB77_1: ## %bar
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
-; SKX-NEXT:    popq %rax
+; SKX-NEXT:    addq $8, %rsp
 ; SKX-NEXT:    retq
 ;
 ; AVX512BW-LABEL: ktest_6:
 ; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    pushq %rax
-; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    vptestnmw %zmm1, %zmm1, %k1
 ; AVX512BW-NEXT:    kord %k1, %k0, %k0
@@ -4896,19 +4872,18 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
 ; AVX512BW-NEXT:    ktestd %k1, %k0
 ; AVX512BW-NEXT:    je LBB77_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
-; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ; AVX512BW-NEXT:  LBB77_1: ## %bar
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
-; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    addq $8, %rsp
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: ktest_6:
 ; AVX512DQ:       ## %bb.0:
-; AVX512DQ-NEXT:    pushq %rax
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
 ; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
 ; AVX512DQ-NEXT:    vpcmpeqw %ymm5, %ymm4, %ymm4
@@ -4939,19 +4914,18 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
 ; AVX512DQ-NEXT:    orl %eax, %ecx
 ; AVX512DQ-NEXT:    je LBB77_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
-; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ; AVX512DQ-NEXT:  LBB77_1: ## %bar
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
-; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
 ; X86-LABEL: ktest_6:
 ; X86:       ## %bb.0:
-; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; X86-NEXT:    vptestnmw %zmm1, %zmm1, %k1
 ; X86-NEXT:    kord %k1, %k0, %k0
@@ -4961,10 +4935,11 @@ define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z)
 ; X86-NEXT:    ktestd %k1, %k0
 ; X86-NEXT:    je LBB77_1
 ; X86-NEXT:  ## %bb.2: ## %exit
-; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
 ; X86-NEXT:  LBB77_1: ## %bar
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp
@@ -4991,8 +4966,6 @@ exit:
 define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
 ; KNL-LABEL: ktest_7:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    pushq %rax
-; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
 ; KNL-NEXT:    vpxor %xmm5, %xmm5, %xmm5
 ; KNL-NEXT:    vpcmpeqb %ymm5, %ymm4, %ymm4
@@ -5019,19 +4992,18 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
 ; KNL-NEXT:    orq %rax, %rcx
 ; KNL-NEXT:    je LBB78_1
 ; KNL-NEXT:  ## %bb.2: ## %exit
-; KNL-NEXT:    popq %rax
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    retq
 ; KNL-NEXT:  LBB78_1: ## %bar
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:    .cfi_def_cfa_offset 16
 ; KNL-NEXT:    vzeroupper
 ; KNL-NEXT:    callq _foo
-; KNL-NEXT:    popq %rax
+; KNL-NEXT:    addq $8, %rsp
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: ktest_7:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    pushq %rax
-; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; SKX-NEXT:    vptestnmb %zmm1, %zmm1, %k1
 ; SKX-NEXT:    korq %k1, %k0, %k0
@@ -5041,19 +5013,18 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
 ; SKX-NEXT:    ktestq %k1, %k0
 ; SKX-NEXT:    je LBB78_1
 ; SKX-NEXT:  ## %bb.2: ## %exit
-; SKX-NEXT:    popq %rax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
 ; SKX-NEXT:  LBB78_1: ## %bar
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:    .cfi_def_cfa_offset 16
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    callq _foo
-; SKX-NEXT:    popq %rax
+; SKX-NEXT:    addq $8, %rsp
 ; SKX-NEXT:    retq
 ;
 ; AVX512BW-LABEL: ktest_7:
 ; AVX512BW:       ## %bb.0:
-; AVX512BW-NEXT:    pushq %rax
-; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    vptestnmb %zmm1, %zmm1, %k1
 ; AVX512BW-NEXT:    korq %k1, %k0, %k0
@@ -5063,19 +5034,18 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
 ; AVX512BW-NEXT:    ktestq %k1, %k0
 ; AVX512BW-NEXT:    je LBB78_1
 ; AVX512BW-NEXT:  ## %bb.2: ## %exit
-; AVX512BW-NEXT:    popq %rax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ; AVX512BW-NEXT:  LBB78_1: ## %bar
+; AVX512BW-NEXT:    pushq %rax
+; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    callq _foo
-; AVX512BW-NEXT:    popq %rax
+; AVX512BW-NEXT:    addq $8, %rsp
 ; AVX512BW-NEXT:    retq
 ;
 ; AVX512DQ-LABEL: ktest_7:
 ; AVX512DQ:       ## %bb.0:
-; AVX512DQ-NEXT:    pushq %rax
-; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
 ; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
 ; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm4, %ymm4
@@ -5102,19 +5072,18 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
 ; AVX512DQ-NEXT:    orq %rax, %rcx
 ; AVX512DQ-NEXT:    je LBB78_1
 ; AVX512DQ-NEXT:  ## %bb.2: ## %exit
-; AVX512DQ-NEXT:    popq %rax
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    retq
 ; AVX512DQ-NEXT:  LBB78_1: ## %bar
+; AVX512DQ-NEXT:    pushq %rax
+; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
 ; AVX512DQ-NEXT:    vzeroupper
 ; AVX512DQ-NEXT:    callq _foo
-; AVX512DQ-NEXT:    popq %rax
+; AVX512DQ-NEXT:    addq $8, %rsp
 ; AVX512DQ-NEXT:    retq
 ;
 ; X86-LABEL: ktest_7:
 ; X86:       ## %bb.0:
-; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; X86-NEXT:    vptestnmb %zmm1, %zmm1, %k1
 ; X86-NEXT:    korq %k1, %k0, %k0
@@ -5126,10 +5095,11 @@ define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
 ; X86-NEXT:    kortestd %k1, %k0
 ; X86-NEXT:    je LBB78_1
 ; X86-NEXT:  ## %bb.2: ## %exit
-; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    retl
 ; X86-NEXT:  LBB78_1: ## %bar
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    .cfi_def_cfa_offset 16
 ; X86-NEXT:    vzeroupper
 ; X86-NEXT:    calll _foo
 ; X86-NEXT:    addl $12, %esp

diff  --git a/llvm/test/CodeGen/X86/live-out-reg-info.ll b/llvm/test/CodeGen/X86/live-out-reg-info.ll
index 9b6b35d2f6b7..461cc1dc6fdb 100644
--- a/llvm/test/CodeGen/X86/live-out-reg-info.ll
+++ b/llvm/test/CodeGen/X86/live-out-reg-info.ll
@@ -9,16 +9,16 @@ declare void @qux()
 define void @foo(i32 %a) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    shrl $23, %edi
 ; CHECK-NEXT:    testl $256, %edi # imm = 0x100
 ; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %true
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq qux
-; CHECK-NEXT:  .LBB0_2: # %false
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB0_2: # %false
 ; CHECK-NEXT:    retq
   %t0 = lshr i32 %a, 23
   br label %next

diff  --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll
index 8877cd410859..ffb896926738 100644
--- a/llvm/test/CodeGen/X86/swifterror.ll
+++ b/llvm/test/CodeGen/X86/swifterror.ll
@@ -419,16 +419,15 @@ entry:
 
 define swiftcc float @conditionally_forward_swifterror(%swift_error** swifterror %error_ptr_ref, i32 %cc) {
 ; CHECK-APPLE-LABEL: conditionally_forward_swifterror:
-; CHECK-APPLE:  pushq %rax
 ; CHECK-APPLE:	testl %edi, %edi
 ; CHECK-APPLE:  je
 
+; CHECK-APPLE:  pushq %rax
 ; CHECK-APPLE:  callq _moo
 ; CHECK-APPLE:  popq %rax
 ; CHECK-APPLE:  retq
 
 ; CHECK-APPLE:  xorps %xmm0, %xmm0
-; CHECK-APPLE:  popq %rax
 ; CHECK-APPLE:  retq
 
 ; CHECK-O0-LABEL: conditionally_forward_swifterror:
@@ -488,13 +487,14 @@ entry:
 
 ; Make sure we can handle the case when isel generates new machine basic blocks.
 ; CHECK-APPLE-LABEL: dont_crash_on_new_isel_blocks:
-; CHECK-APPLE: pushq   %rax
 ; CHECK-APPLE: xorl    %eax, %eax
 ; CHECK-APPLE: testb   %al, %al
 ; CHECK-APPLE: jne
-; CHECK-APPLE: callq   *%rax
-; CHECK-APPLE: popq    %rax
-; CHECK-APPLE: ret
+; CHECK-APPLE:         pushq   %rax
+; CHECK-APPLE-NEXT:  .cfi_def_cfa_offset 16
+; CHECK-APPLE-NEXT:    callq   *%rax
+; CHECK-APPLE-NEXT:    popq    %rax
+; CHECK-APPLE-NEXT:    ret
 
 define swiftcc void @dont_crash_on_new_isel_blocks(%swift_error** nocapture swifterror, i1, i8**) {
 entry:

diff  --git a/llvm/test/CodeGen/X86/test-vs-bittest.ll b/llvm/test/CodeGen/X86/test-vs-bittest.ll
index 38ff7a40aa6d..6c6282ce1ee0 100644
--- a/llvm/test/CodeGen/X86/test-vs-bittest.ll
+++ b/llvm/test/CodeGen/X86/test-vs-bittest.ll
@@ -4,15 +4,15 @@
 define void @test64(i64 inreg %x) {
 ; CHECK-LABEL: test64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
 ; CHECK-NEXT:    jne .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB0_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB0_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 2048
   %s = icmp eq i64 %t, 0
@@ -28,15 +28,15 @@ no:
 define void @test64_optsize(i64 inreg %x) optsize {
 ; CHECK-LABEL: test64_optsize:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jb .LBB1_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB1_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB1_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 2048
   %s = icmp eq i64 %t, 0
@@ -52,15 +52,15 @@ no:
 define void @test64_pgso(i64 inreg %x) !prof !14 {
 ; CHECK-LABEL: test64_pgso:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jb .LBB2_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB2_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB2_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 2048
   %s = icmp eq i64 %t, 0
@@ -81,15 +81,15 @@ no:
 define void @test64_2(i64 inreg %x) {
 ; CHECK-LABEL: test64_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
 ; CHECK-NEXT:    je .LBB3_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB3_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB3_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 2048
   %s = icmp eq i64 %t, 0
@@ -105,15 +105,15 @@ no:
 define void @test64_optsize_2(i64 inreg %x) optsize {
 ; CHECK-LABEL: test64_optsize_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jae .LBB4_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB4_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB4_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 2048
   %s = icmp eq i64 %t, 0
@@ -129,15 +129,15 @@ no:
 define void @test64_pgso_2(i64 inreg %x) !prof !14 {
 ; CHECK-LABEL: test64_pgso_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jae .LBB5_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB5_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB5_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 2048
   %s = icmp eq i64 %t, 0
@@ -153,15 +153,15 @@ no:
 define void @test64_3(i64 inreg %x) {
 ; CHECK-LABEL: test64_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btq $32, %rdi
 ; CHECK-NEXT:    jb .LBB6_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB6_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB6_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 4294967296
   %s = icmp eq i64 %t, 0
@@ -177,15 +177,15 @@ no:
 define void @test64_optsize_3(i64 inreg %x) optsize {
 ; CHECK-LABEL: test64_optsize_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btq $32, %rdi
 ; CHECK-NEXT:    jb .LBB7_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB7_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB7_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 4294967296
   %s = icmp eq i64 %t, 0
@@ -201,15 +201,15 @@ no:
 define void @test64_pgso_3(i64 inreg %x) !prof !14 {
 ; CHECK-LABEL: test64_pgso_3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btq $32, %rdi
 ; CHECK-NEXT:    jb .LBB8_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB8_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB8_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 4294967296
   %s = icmp eq i64 %t, 0
@@ -225,15 +225,15 @@ no:
 define void @test64_4(i64 inreg %x) {
 ; CHECK-LABEL: test64_4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btq $32, %rdi
 ; CHECK-NEXT:    jae .LBB9_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB9_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB9_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 4294967296
   %s = icmp eq i64 %t, 0
@@ -249,15 +249,15 @@ no:
 define void @test64_optsize_4(i64 inreg %x) optsize {
 ; CHECK-LABEL: test64_optsize_4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btq $32, %rdi
 ; CHECK-NEXT:    jae .LBB10_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB10_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB10_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 4294967296
   %s = icmp eq i64 %t, 0
@@ -273,15 +273,15 @@ no:
 define void @test64_pgso_4(i64 inreg %x) !prof !14 {
 ; CHECK-LABEL: test64_pgso_4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btq $32, %rdi
 ; CHECK-NEXT:    jae .LBB11_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB11_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB11_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i64 %x, 4294967296
   %s = icmp eq i64 %t, 0
@@ -297,15 +297,15 @@ no:
 define void @test32(i32 inreg %x) {
 ; CHECK-LABEL: test32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
 ; CHECK-NEXT:    jne .LBB12_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB12_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB12_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i32 %x, 2048
   %s = icmp eq i32 %t, 0
@@ -321,15 +321,15 @@ no:
 define void @test32_optsize(i32 inreg %x) optsize {
 ; CHECK-LABEL: test32_optsize:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jb .LBB13_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB13_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB13_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i32 %x, 2048
   %s = icmp eq i32 %t, 0
@@ -345,15 +345,15 @@ no:
 define void @test32_2(i32 inreg %x) {
 ; CHECK-LABEL: test32_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
 ; CHECK-NEXT:    je .LBB14_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB14_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB14_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i32 %x, 2048
   %s = icmp eq i32 %t, 0
@@ -369,15 +369,15 @@ no:
 define void @test32_optsize_2(i32 inreg %x) optsize {
 ; CHECK-LABEL: test32_optsize_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jae .LBB15_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB15_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB15_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i32 %x, 2048
   %s = icmp eq i32 %t, 0
@@ -393,15 +393,15 @@ no:
 define void @test32_pgso_2(i32 inreg %x) !prof !14 {
 ; CHECK-LABEL: test32_pgso_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jae .LBB16_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB16_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB16_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i32 %x, 2048
   %s = icmp eq i32 %t, 0
@@ -417,15 +417,15 @@ no:
 define void @test16(i16 inreg %x) {
 ; CHECK-LABEL: test16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
 ; CHECK-NEXT:    jne .LBB17_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB17_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB17_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i16 %x, 2048
   %s = icmp eq i16 %t, 0
@@ -441,15 +441,15 @@ no:
 define void @test16_optsize(i16 inreg %x) optsize {
 ; CHECK-LABEL: test16_optsize:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jb .LBB18_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB18_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB18_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i16 %x, 2048
   %s = icmp eq i16 %t, 0
@@ -465,15 +465,15 @@ no:
 define void @test16_pgso(i16 inreg %x) !prof !14 {
 ; CHECK-LABEL: test16_pgso:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jb .LBB19_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB19_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB19_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i16 %x, 2048
   %s = icmp eq i16 %t, 0
@@ -489,15 +489,15 @@ no:
 define void @test16_2(i16 inreg %x) {
 ; CHECK-LABEL: test16_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    testl $2048, %edi # imm = 0x800
 ; CHECK-NEXT:    je .LBB20_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB20_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB20_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i16 %x, 2048
   %s = icmp eq i16 %t, 0
@@ -513,15 +513,15 @@ no:
 define void @test16_optsize_2(i16 inreg %x) optsize {
 ; CHECK-LABEL: test16_optsize_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jae .LBB21_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB21_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB21_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i16 %x, 2048
   %s = icmp eq i16 %t, 0
@@ -537,15 +537,15 @@ no:
 define void @test16_pgso_2(i16 inreg %x) !prof !14 {
 ; CHECK-LABEL: test16_pgso_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    btl $11, %edi
 ; CHECK-NEXT:    jae .LBB22_2
 ; CHECK-NEXT:  # %bb.1: # %yes
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    callq bar
-; CHECK-NEXT:  .LBB22_2: # %no
 ; CHECK-NEXT:    popq %rax
 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:  .LBB22_2: # %no
 ; CHECK-NEXT:    retq
   %t = and i16 %x, 2048
   %s = icmp eq i16 %t, 0

diff  --git a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
index a73c06d97a1f..f3d4cf8858ee 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
@@ -1,17 +1,14 @@
-; RUN: llc %s -o - | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin10.6 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s --check-prefix=NOCOMPACTUNWIND
 ;
 ; Note: This test cannot be merged with the shrink-wrapping tests
 ; because the booleans set on the command line take precedence on
 ; the target logic that disable shrink-wrapping.
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "x86_64-apple-macosx"
 
-
-; This test checks that we do not use shrink-wrapping when
-; the function does not have any frame pointer and may unwind.
-; This is a workaround for a limitation in the emission of
-; the CFI directives, that are not correct in such case.
-; PR25614
+; The current compact unwind scheme does not work when the prologue is not at
+; the start (the instructions before the prologue cannot be described).
+; Currently we choose to not perform shrink-wrapping for functions without FP
+; not marked as nounwind. PR25614
 ;
 ; No shrink-wrapping should occur here, until the CFI information are fixed.
 ; CHECK-LABEL: framelessUnwind:
@@ -41,6 +38,12 @@ target triple = "x86_64-apple-macosx"
 ; CHECK-NEXT: popq
 ;
 ; CHECK-NEXT: retq
+
+; On a platform which does not support compact unwind, shrink wrapping is enabled.
+; NOCOMPACTUNWIND-LABEL: framelessUnwind:
+; NOCOMPACTUNWIND-NOT:     pushq
+; NOCOMPACTUNWIND:       # %bb.1:
+; NOCOMPACTUNWIND-NEXT:    pushq %rax
 define i32 @framelessUnwind(i32 %a, i32 %b) #0 {
   %tmp = alloca i32, align 4
   %tmp2 = icmp slt i32 %a, %b


        


More information about the llvm-commits mailing list