[llvm] fold mov dec/inc to lea +- 1 (PR #185194)

Takashi Idobe via llvm-commits llvm-commits at lists.llvm.org
Sun Mar 8 11:12:51 PDT 2026


https://github.com/Takashiidobe updated https://github.com/llvm/llvm-project/pull/185194

>From 05c6bc13b71ad890fb10d435a3007358f8355a88 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Sat, 7 Mar 2026 09:09:35 -0500
Subject: [PATCH 1/4] fuse mov + inc/dec pattern into just lea reg [reg +- 1]

---
 llvm/lib/Target/X86/X86FixupLEAs.cpp | 90 ++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 07f656fc5ccfd..2837968b819cc 100644
--- a/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/X86BaseInfo.h"
 #include "X86.h"
 #include "X86InstrInfo.h"
 #include "X86Subtarget.h"
@@ -120,6 +121,13 @@ class FixupLEAsImpl {
   MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator &MBBI) const;
 
+  /// Fold adjacent mov/inc-dec into a single LEA:
+  ///   mov dst, src
+  ///   dec/inc dst   (flags dead)
+  /// =>
+  ///   lea dst, [src +/- 1]
+  bool foldMovIncDecToLEA(MachineBasicBlock &MBB, const X86Subtarget &ST) const;
+
 public:
   FixupLEAsImpl(ProfileSummaryInfo *PSI, MachineBlockFrequencyInfo *MBFI)
       : PSI(PSI), MBFI(MBFI) {}
@@ -229,6 +237,16 @@ static bool isLEA(unsigned Opcode) {
          Opcode == X86::LEA64_32r;
 }
 
+static MachineBasicBlock::iterator
+getPrevNonDebugInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
+  while (I != MBB.begin()) {
+    --I;
+    if (!I->isDebugInstr())
+      return I;
+  }
+  return MBB.end();
+}
+
 bool FixupLEAsImpl::runOnMachineFunction(MachineFunction &MF) {
   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
   bool IsSlowLEA = ST.slowLEA();
@@ -244,6 +262,8 @@ bool FixupLEAsImpl::runOnMachineFunction(MachineFunction &MF) {
 
   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
   for (MachineBasicBlock &MBB : MF) {
+    foldMovIncDecToLEA(MBB, ST);
+
     // First pass. Try to remove or optimize existing LEAs.
     bool OptIncDecPerBB =
         OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
@@ -273,6 +293,76 @@ bool FixupLEAsImpl::runOnMachineFunction(MachineFunction &MF) {
   return true;
 }
 
+bool FixupLEAsImpl::foldMovIncDecToLEA(MachineBasicBlock &MBB,
+                                       const X86Subtarget &ST) const {
+  bool Changed = false;
+
+  for (auto I = MBB.begin(); I != MBB.end();) {
+    MachineInstr &MI = *I;
+    unsigned Opc = MI.getOpcode();
+
+    bool IsDec = Opc == X86::DEC32r || Opc == X86::DEC64r;
+    bool IsInc = Opc == X86::INC32r || Opc == X86::INC64r;
+    bool Is64BitIncDec = Opc == X86::DEC64r || Opc == X86::INC64r;
+    if (!IsDec && !IsInc) {
+      ++I;
+      continue;
+    }
+
+    if (!MI.registerDefIsDead(X86::EFLAGS, TRI)) {
+      ++I;
+      continue;
+    }
+
+    Register DstReg = MI.getOperand(0).getReg();
+    if (!DstReg.isPhysical() || MI.getOperand(1).getReg() != DstReg) {
+      ++I;
+      continue;
+    }
+
+    auto Prev = getPrevNonDebugInstr(MBB, I);
+    if (Prev == MBB.end()) {
+      ++I;
+      continue;
+    }
+
+    unsigned MovOpc = Is64BitIncDec ? X86::MOV64rr : X86::MOV32rr;
+    if (Prev->getOpcode() != MovOpc || Prev->getOperand(0).getReg() != DstReg) {
+      ++I;
+      continue;
+    }
+
+    Register SrcReg = Prev->getOperand(1).getReg();
+    Register LEASrcReg = SrcReg;
+    unsigned LEAOpc = X86::LEA32r;
+    if (Is64BitIncDec) {
+      LEAOpc = X86::LEA64r;
+    } else if (ST.is64Bit()) {
+      LEAOpc = X86::LEA64_32r;
+      LEASrcReg = getX86SubSuperRegister(SrcReg, 64);
+    }
+
+    MachineInstr *NewMI =
+        BuildMI(MBB, I, MI.getDebugLoc(), TII->get(LEAOpc), DstReg)
+            .addReg(LEASrcReg)
+            .addImm(1)
+            .addReg(0)
+            .addImm(IsDec ? -1 : 1)
+            .addReg(0);
+
+    ++NumLEAs;
+    Changed = true;
+    MBB.getParent()->substituteDebugValuesForInst(MI, *NewMI, 1);
+    MBB.getParent()->substituteDebugValuesForInst(*Prev, *NewMI, 1);
+
+    auto EraseIncDec = I++;
+    MBB.erase(EraseIncDec);
+    MBB.erase(Prev);
+  }
+
+  return Changed;
+}
+
 FixupLEAsImpl::RegUsageState
 FixupLEAsImpl::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
   RegUsageState RegUsage = RU_NotUsed;

>From fd0e32fef9ec7f14e35d6027eba68898559f1ca3 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Sat, 7 Mar 2026 09:19:24 -0500
Subject: [PATCH 2/4] Update test checks for mov+inc/dec to lea fold

---
 .../CodeGen/X86/AMX/amx-ldtilecfg-insert.ll   |   3 +-
 llvm/test/CodeGen/X86/fold-loop-of-urem.ll    |   6 +-
 llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll   | 100 ++++++++++++++++++
 llvm/test/CodeGen/X86/pr44412.ll              |   6 +-
 4 files changed, 105 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll

diff --git a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
index 8a8e7a3b4df2c..abff79a5b232d 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
@@ -238,8 +238,7 @@ define dso_local void @test5(i16 signext %0, i16 signext %1) nounwind {
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    movl $buf, %ecx
 ; CHECK-NEXT:    movl $32, %edx
-; CHECK-NEXT:    movl %esi, %r8d
-; CHECK-NEXT:    decl %r8d
+; CHECK-NEXT:    leal -1(%rsi), %r8d
 ; CHECK-NEXT:    jmp .LBB4_1
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB4_3: # %if.false
diff --git a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
index cb1c078ee5129..37cb39dac265e 100644
--- a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
+++ b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
@@ -389,8 +389,7 @@ define void @simple_urem_fail_bad_incr3(i32 %N, i32 %rem_amt) nounwind {
 ; CHECK-NEXT:    testb $1, %r14b
 ; CHECK-NEXT:    je .LBB5_7
 ; CHECK-NEXT:  # %bb.4: # in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    movl %eax, %ebp
-; CHECK-NEXT:    incl %ebp
+; CHECK-NEXT:    leal 1(%rax), %ebp
 ; CHECK-NEXT:    jmp .LBB5_6
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB5_5: # %for.body2
@@ -901,8 +900,7 @@ define void @simple_urem_multi_latch_non_canonical(i32 %N, i32 %rem_amt) nounwin
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    movl %esi, %ebx
-; CHECK-NEXT:    movl %edi, %ebp
-; CHECK-NEXT:    decl %ebp
+; CHECK-NEXT:    leal -1(%rdi), %ebp
 ; CHECK-NEXT:    xorl %r12d, %r12d
 ; CHECK-NEXT:    xorl %r14d, %r14d
 ; CHECK-NEXT:    xorl %r13d, %r13d
diff --git a/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
new file mode 100644
index 0000000000000..4690fbd0cb5c3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s
+
+define i64 @mov_dec(<32 x i8> %x) local_unnamed_addr {
+; CHECK-LABEL: mov_dec:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovmskb %ymm0, %ecx
+; CHECK-NEXT:    leal -1(%rcx), %eax
+; CHECK-NEXT:    shlq $32, %rcx
+; CHECK-NEXT:    orq %rcx, %rax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cmp = icmp slt <32 x i8> %x, zeroinitializer
+  %mvmsk = bitcast <32 x i1> %cmp to i32
+  %dec = add i32 %mvmsk, -1
+  %ext = zext i32 %mvmsk to i64
+  %shl = shl nuw i64 %ext, 32
+  %dec.ext = zext i32 %dec to i64
+  %res = or disjoint i64 %shl, %dec.ext
+  ret i64 %res
+}
+
+define i64 @mov_inc(<32 x i8> %x) local_unnamed_addr {
+; CHECK-LABEL: mov_inc:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovmskb %ymm0, %ecx
+; CHECK-NEXT:    leal 1(%rcx), %eax
+; CHECK-NEXT:    shlq $32, %rcx
+; CHECK-NEXT:    orq %rcx, %rax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cmp = icmp slt <32 x i8> %x, zeroinitializer
+  %mvmsk = bitcast <32 x i1> %cmp to i32
+  %inc = add i32 %mvmsk, 1
+  %ext = zext i32 %mvmsk to i64
+  %shl = shl nuw i64 %ext, 32
+  %inc.ext = zext i32 %inc to i64
+  %res = or disjoint i64 %shl, %inc.ext
+  ret i64 %res
+}
+
+define i64 @mov_inc_flags_live(<32 x i8> %x) local_unnamed_addr {
+; CHECK-LABEL: mov_inc_flags_live:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovmskb %ymm0, %ecx
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    incl %eax
+; CHECK-NEXT:    cmovneq %rcx, %rax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cmp = icmp slt <32 x i8> %x, zeroinitializer
+  %mvmsk = bitcast <32 x i1> %cmp to i32
+  %inc = add i32 %mvmsk, 1
+  %iszero = icmp eq i32 %inc, 0
+  %ext = zext i32 %mvmsk to i64
+  %inc.ext = zext i32 %inc to i64
+  %sel = select i1 %iszero, i64 %inc.ext, i64 %ext
+  ret i64 %sel
+}
+
+define i64 @mov_dec_flags_live(<32 x i8> %x) local_unnamed_addr {
+; CHECK-LABEL: mov_dec_flags_live:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovmskb %ymm0, %ecx
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    decl %eax
+; CHECK-NEXT:    cmovneq %rcx, %rax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cmp = icmp slt <32 x i8> %x, zeroinitializer
+  %mvmsk = bitcast <32 x i1> %cmp to i32
+  %dec = add i32 %mvmsk, -1
+  %iszero = icmp eq i32 %dec, 0
+  %ext = zext i32 %mvmsk to i64
+  %dec.ext = zext i32 %dec to i64
+  %sel = select i1 %iszero, i64 %dec.ext, i64 %ext
+  ret i64 %sel
+}
+
+define i64 @mov_inc_not_adjacent(<32 x i8> %x) local_unnamed_addr {
+; CHECK-LABEL: mov_inc_not_adjacent:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpmovmskb %ymm0, %eax
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shlq $32, %rcx
+; CHECK-NEXT:    incl %eax
+; CHECK-NEXT:    orq %rcx, %rax
+; CHECK-NEXT:    xorq $5, %rax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cmp = icmp slt <32 x i8> %x, zeroinitializer
+  %mvmsk = bitcast <32 x i1> %cmp to i32
+  %ext = zext i32 %mvmsk to i64
+  %shl = shl nuw i64 %ext, 32
+  %tmp = xor i64 %shl, 5
+  %inc = add i32 %mvmsk, 1
+  %inc.ext = zext i32 %inc to i64
+  %res = xor i64 %tmp, %inc.ext
+  ret i64 %res
+}
diff --git a/llvm/test/CodeGen/X86/pr44412.ll b/llvm/test/CodeGen/X86/pr44412.ll
index 546dbcc156129..331d9f0e7d45e 100644
--- a/llvm/test/CodeGen/X86/pr44412.ll
+++ b/llvm/test/CodeGen/X86/pr44412.ll
@@ -8,8 +8,7 @@ define void @bar(i32 %0, i32 %1) nounwind {
 ; CHECK-NEXT:    je .LBB0_4
 ; CHECK-NEXT:  # %bb.1: # %.preheader
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    movl %edi, %ebx
-; CHECK-NEXT:    decl %ebx
+; CHECK-NEXT:    leal -1(%rdi), %ebx
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_2: # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %ebx, %edi
@@ -41,8 +40,7 @@ define void @baz(i32 %0, i32 %1) nounwind {
 ; CHECK-NEXT:    je .LBB1_4
 ; CHECK-NEXT:  # %bb.1: # %.preheader
 ; CHECK-NEXT:    pushq %rbx
-; CHECK-NEXT:    movl %edi, %ebx
-; CHECK-NEXT:    decl %ebx
+; CHECK-NEXT:    leal -1(%rdi), %ebx
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB1_2: # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movl %ebx, %edi

>From 0c25eb25122725fff4bbc22f0a0d2390ffabea76 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Sun, 8 Mar 2026 09:39:24 -0400
Subject: [PATCH 3/4] add +slow-3ops-lea and -slow-3ops-lea to check if the
 generated code is any different to test file

---
 llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
index 4690fbd0cb5c3..6fcc2371f2024 100644
--- a/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
+++ b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,+slow-3ops-lea | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-slow-3ops-lea | FileCheck %s
 
 define i64 @mov_dec(<32 x i8> %x) local_unnamed_addr {
 ; CHECK-LABEL: mov_dec:

>From d4b76999e5ca7793c7d56941978521919710ef81 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Sun, 8 Mar 2026 14:12:40 -0400
Subject: [PATCH 4/4] remove the negative tests from asm generation and move
 those tests to MIR generation while only running fix up leas pass

---
 .../CodeGen/MIR/X86/mov-inc-dec-to-lea.mir    | 78 +++++++++++++++++++
 llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll   | 64 +--------------
 2 files changed, 80 insertions(+), 62 deletions(-)
 create mode 100644 llvm/test/CodeGen/MIR/X86/mov-inc-dec-to-lea.mir

diff --git a/llvm/test/CodeGen/MIR/X86/mov-inc-dec-to-lea.mir b/llvm/test/CodeGen/MIR/X86/mov-inc-dec-to-lea.mir
new file mode 100644
index 0000000000000..df901fc9874d1
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/mov-inc-dec-to-lea.mir
@@ -0,0 +1,78 @@
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=x86-fixup-leas %s -o - | FileCheck %s
+
+---
+name: mov_inc_fold
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $rcx
+    $eax = MOV32rr $ecx
+    $eax = INC32r killed $eax, implicit-def dead $eflags
+    ; CHECK: $eax = LEA64_32r $rcx, 1, $noreg, 1, $noreg
+    RET64 $rax
+...
+---
+name: mov_dec_fold
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $rcx
+    $eax = MOV32rr $ecx
+    $eax = DEC32r killed $eax, implicit-def dead $eflags
+    ; CHECK: $eax = LEA64_32r $rcx, 1, $noreg, -1, $noreg
+    RET64 $rax
+...
+---
+name: mov_inc_flags_live
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $rcx
+    $eax = MOV32rr $ecx, implicit-def $rax
+    $eax = INC32r $eax, implicit-def $eflags, implicit killed $rax, implicit-def $rax
+    $rax = CMOV64rr killed $rax, killed $rcx, 5, implicit killed $eflags
+    ; CHECK-NOT: LEA
+    ; CHECK: INC32r
+    RET64 $rax
+...
+---
+name: mov_inc_not_adjacent
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $rcx
+    $eax = MOV32rr $ecx, implicit-def $rax
+    $rcx = SHL64ri killed $rcx, 32, implicit-def dead $eflags
+    $eax = INC32r killed $eax, implicit-def dead $eflags, implicit killed $rax, implicit-def $rax
+    $rax = OR64rr killed $rax, killed $rcx, implicit-def dead $eflags
+    ; CHECK-NOT: LEA
+    ; CHECK: INC32r
+    RET64 $rax
+...
+---
+name: mov_dec_flags_live
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $rcx
+    $eax = MOV32rr $ecx, implicit-def $rax
+    $eax = DEC32r $eax, implicit-def $eflags, implicit killed $rax, implicit-def $rax
+    $rax = CMOV64rr killed $rax, killed $rcx, 5, implicit killed $eflags
+    ; CHECK-NOT: LEA
+    ; CHECK: DEC32r
+    RET64 $rax
+...
+---
+name: mov_dec_not_adjacent
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $rcx
+    $eax = MOV32rr $ecx, implicit-def $rax
+    $rcx = SHL64ri killed $rcx, 32, implicit-def dead $eflags
+    $eax = DEC32r killed $eax, implicit-def dead $eflags, implicit killed $rax, implicit-def $rax
+    $rax = OR64rr killed $rax, killed $rcx, implicit-def dead $eflags
+    ; CHECK-NOT: LEA
+    ; CHECK: DEC32r
+    RET64 $rax
+...
diff --git a/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
index 6fcc2371f2024..0f861d646584b 100644
--- a/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
+++ b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,+slow-3ops-lea | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-slow-3ops-lea | FileCheck %s
 
-define i64 @mov_dec(<32 x i8> %x) local_unnamed_addr {
+define i64 @mov_dec(<32 x i8> %x) {
 ; CHECK-LABEL: mov_dec:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovmskb %ymm0, %ecx
@@ -21,7 +21,7 @@ define i64 @mov_dec(<32 x i8> %x) local_unnamed_addr {
   ret i64 %res
 }
 
-define i64 @mov_inc(<32 x i8> %x) local_unnamed_addr {
+define i64 @mov_inc(<32 x i8> %x) {
 ; CHECK-LABEL: mov_inc:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpmovmskb %ymm0, %ecx
@@ -39,63 +39,3 @@ define i64 @mov_inc(<32 x i8> %x) local_unnamed_addr {
   %res = or disjoint i64 %shl, %inc.ext
   ret i64 %res
 }
-
-define i64 @mov_inc_flags_live(<32 x i8> %x) local_unnamed_addr {
-; CHECK-LABEL: mov_inc_flags_live:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpmovmskb %ymm0, %ecx
-; CHECK-NEXT:    movl %ecx, %eax
-; CHECK-NEXT:    incl %eax
-; CHECK-NEXT:    cmovneq %rcx, %rax
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  %cmp = icmp slt <32 x i8> %x, zeroinitializer
-  %mvmsk = bitcast <32 x i1> %cmp to i32
-  %inc = add i32 %mvmsk, 1
-  %iszero = icmp eq i32 %inc, 0
-  %ext = zext i32 %mvmsk to i64
-  %inc.ext = zext i32 %inc to i64
-  %sel = select i1 %iszero, i64 %inc.ext, i64 %ext
-  ret i64 %sel
-}
-
-define i64 @mov_dec_flags_live(<32 x i8> %x) local_unnamed_addr {
-; CHECK-LABEL: mov_dec_flags_live:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpmovmskb %ymm0, %ecx
-; CHECK-NEXT:    movl %ecx, %eax
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    cmovneq %rcx, %rax
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  %cmp = icmp slt <32 x i8> %x, zeroinitializer
-  %mvmsk = bitcast <32 x i1> %cmp to i32
-  %dec = add i32 %mvmsk, -1
-  %iszero = icmp eq i32 %dec, 0
-  %ext = zext i32 %mvmsk to i64
-  %dec.ext = zext i32 %dec to i64
-  %sel = select i1 %iszero, i64 %dec.ext, i64 %ext
-  ret i64 %sel
-}
-
-define i64 @mov_inc_not_adjacent(<32 x i8> %x) local_unnamed_addr {
-; CHECK-LABEL: mov_inc_not_adjacent:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpmovmskb %ymm0, %eax
-; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:    shlq $32, %rcx
-; CHECK-NEXT:    incl %eax
-; CHECK-NEXT:    orq %rcx, %rax
-; CHECK-NEXT:    xorq $5, %rax
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  %cmp = icmp slt <32 x i8> %x, zeroinitializer
-  %mvmsk = bitcast <32 x i1> %cmp to i32
-  %ext = zext i32 %mvmsk to i64
-  %shl = shl nuw i64 %ext, 32
-  %tmp = xor i64 %shl, 5
-  %inc = add i32 %mvmsk, 1
-  %inc.ext = zext i32 %inc to i64
-  %res = xor i64 %tmp, %inc.ext
-  ret i64 %res
-}



More information about the llvm-commits mailing list