[llvm] fold mov dec/inc to lea +- 1 (PR #185194)
Takashi Idobe via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 8 11:12:51 PDT 2026
https://github.com/Takashiidobe updated https://github.com/llvm/llvm-project/pull/185194
>From 05c6bc13b71ad890fb10d435a3007358f8355a88 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Sat, 7 Mar 2026 09:09:35 -0500
Subject: [PATCH 1/4] fuse mov + inc/dec pattern into just lea reg [reg +- 1]
---
llvm/lib/Target/X86/X86FixupLEAs.cpp | 90 ++++++++++++++++++++++++++++
1 file changed, 90 insertions(+)
diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 07f656fc5ccfd..2837968b819cc 100644
--- a/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/X86BaseInfo.h"
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
@@ -120,6 +121,13 @@ class FixupLEAsImpl {
MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) const;
+ /// Fold adjacent mov/inc-dec into a single LEA:
+ /// mov dst, src
+ /// dec/inc dst (flags dead)
+ /// =>
+ /// lea dst, [src +/- 1]
+ bool foldMovIncDecToLEA(MachineBasicBlock &MBB, const X86Subtarget &ST) const;
+
public:
FixupLEAsImpl(ProfileSummaryInfo *PSI, MachineBlockFrequencyInfo *MBFI)
: PSI(PSI), MBFI(MBFI) {}
@@ -229,6 +237,16 @@ static bool isLEA(unsigned Opcode) {
Opcode == X86::LEA64_32r;
}
+static MachineBasicBlock::iterator
+getPrevNonDebugInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) {
+ while (I != MBB.begin()) {
+ --I;
+ if (!I->isDebugInstr())
+ return I;
+ }
+ return MBB.end();
+}
+
bool FixupLEAsImpl::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
bool IsSlowLEA = ST.slowLEA();
@@ -244,6 +262,8 @@ bool FixupLEAsImpl::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
for (MachineBasicBlock &MBB : MF) {
+ foldMovIncDecToLEA(MBB, ST);
+
// First pass. Try to remove or optimize existing LEAs.
bool OptIncDecPerBB =
OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
@@ -273,6 +293,76 @@ bool FixupLEAsImpl::runOnMachineFunction(MachineFunction &MF) {
return true;
}
+bool FixupLEAsImpl::foldMovIncDecToLEA(MachineBasicBlock &MBB,
+ const X86Subtarget &ST) const {
+ bool Changed = false;
+
+ for (auto I = MBB.begin(); I != MBB.end();) {
+ MachineInstr &MI = *I;
+ unsigned Opc = MI.getOpcode();
+
+ bool IsDec = Opc == X86::DEC32r || Opc == X86::DEC64r;
+ bool IsInc = Opc == X86::INC32r || Opc == X86::INC64r;
+ bool Is64BitIncDec = Opc == X86::DEC64r || Opc == X86::INC64r;
+ if (!IsDec && !IsInc) {
+ ++I;
+ continue;
+ }
+
+ if (!MI.registerDefIsDead(X86::EFLAGS, TRI)) {
+ ++I;
+ continue;
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!DstReg.isPhysical() || MI.getOperand(1).getReg() != DstReg) {
+ ++I;
+ continue;
+ }
+
+ auto Prev = getPrevNonDebugInstr(MBB, I);
+ if (Prev == MBB.end()) {
+ ++I;
+ continue;
+ }
+
+ unsigned MovOpc = Is64BitIncDec ? X86::MOV64rr : X86::MOV32rr;
+ if (Prev->getOpcode() != MovOpc || Prev->getOperand(0).getReg() != DstReg) {
+ ++I;
+ continue;
+ }
+
+ Register SrcReg = Prev->getOperand(1).getReg();
+ Register LEASrcReg = SrcReg;
+ unsigned LEAOpc = X86::LEA32r;
+ if (Is64BitIncDec) {
+ LEAOpc = X86::LEA64r;
+ } else if (ST.is64Bit()) {
+ LEAOpc = X86::LEA64_32r;
+ LEASrcReg = getX86SubSuperRegister(SrcReg, 64);
+ }
+
+ MachineInstr *NewMI =
+ BuildMI(MBB, I, MI.getDebugLoc(), TII->get(LEAOpc), DstReg)
+ .addReg(LEASrcReg)
+ .addImm(1)
+ .addReg(0)
+ .addImm(IsDec ? -1 : 1)
+ .addReg(0);
+
+ ++NumLEAs;
+ Changed = true;
+ MBB.getParent()->substituteDebugValuesForInst(MI, *NewMI, 1);
+ MBB.getParent()->substituteDebugValuesForInst(*Prev, *NewMI, 1);
+
+ auto EraseIncDec = I++;
+ MBB.erase(EraseIncDec);
+ MBB.erase(Prev);
+ }
+
+ return Changed;
+}
+
FixupLEAsImpl::RegUsageState
FixupLEAsImpl::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
RegUsageState RegUsage = RU_NotUsed;
>From fd0e32fef9ec7f14e35d6027eba68898559f1ca3 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Sat, 7 Mar 2026 09:19:24 -0500
Subject: [PATCH 2/4] Update test checks for mov+inc/dec to lea fold
---
.../CodeGen/X86/AMX/amx-ldtilecfg-insert.ll | 3 +-
llvm/test/CodeGen/X86/fold-loop-of-urem.ll | 6 +-
llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll | 100 ++++++++++++++++++
llvm/test/CodeGen/X86/pr44412.ll | 6 +-
4 files changed, 105 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
diff --git a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
index 8a8e7a3b4df2c..abff79a5b232d 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll
@@ -238,8 +238,7 @@ define dso_local void @test5(i16 signext %0, i16 signext %1) nounwind {
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movl $buf, %ecx
; CHECK-NEXT: movl $32, %edx
-; CHECK-NEXT: movl %esi, %r8d
-; CHECK-NEXT: decl %r8d
+; CHECK-NEXT: leal -1(%rsi), %r8d
; CHECK-NEXT: jmp .LBB4_1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB4_3: # %if.false
diff --git a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
index cb1c078ee5129..37cb39dac265e 100644
--- a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
+++ b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
@@ -389,8 +389,7 @@ define void @simple_urem_fail_bad_incr3(i32 %N, i32 %rem_amt) nounwind {
; CHECK-NEXT: testb $1, %r14b
; CHECK-NEXT: je .LBB5_7
; CHECK-NEXT: # %bb.4: # in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT: movl %eax, %ebp
-; CHECK-NEXT: incl %ebp
+; CHECK-NEXT: leal 1(%rax), %ebp
; CHECK-NEXT: jmp .LBB5_6
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB5_5: # %for.body2
@@ -901,8 +900,7 @@ define void @simple_urem_multi_latch_non_canonical(i32 %N, i32 %rem_amt) nounwin
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movl %esi, %ebx
-; CHECK-NEXT: movl %edi, %ebp
-; CHECK-NEXT: decl %ebp
+; CHECK-NEXT: leal -1(%rdi), %ebp
; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: xorl %r13d, %r13d
diff --git a/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
new file mode 100644
index 0000000000000..4690fbd0cb5c3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s
+
+define i64 @mov_dec(<32 x i8> %x) local_unnamed_addr {
+; CHECK-LABEL: mov_dec:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovmskb %ymm0, %ecx
+; CHECK-NEXT: leal -1(%rcx), %eax
+; CHECK-NEXT: shlq $32, %rcx
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cmp = icmp slt <32 x i8> %x, zeroinitializer
+ %mvmsk = bitcast <32 x i1> %cmp to i32
+ %dec = add i32 %mvmsk, -1
+ %ext = zext i32 %mvmsk to i64
+ %shl = shl nuw i64 %ext, 32
+ %dec.ext = zext i32 %dec to i64
+ %res = or disjoint i64 %shl, %dec.ext
+ ret i64 %res
+}
+
+define i64 @mov_inc(<32 x i8> %x) local_unnamed_addr {
+; CHECK-LABEL: mov_inc:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovmskb %ymm0, %ecx
+; CHECK-NEXT: leal 1(%rcx), %eax
+; CHECK-NEXT: shlq $32, %rcx
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cmp = icmp slt <32 x i8> %x, zeroinitializer
+ %mvmsk = bitcast <32 x i1> %cmp to i32
+ %inc = add i32 %mvmsk, 1
+ %ext = zext i32 %mvmsk to i64
+ %shl = shl nuw i64 %ext, 32
+ %inc.ext = zext i32 %inc to i64
+ %res = or disjoint i64 %shl, %inc.ext
+ ret i64 %res
+}
+
+define i64 @mov_inc_flags_live(<32 x i8> %x) local_unnamed_addr {
+; CHECK-LABEL: mov_inc_flags_live:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovmskb %ymm0, %ecx
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: incl %eax
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cmp = icmp slt <32 x i8> %x, zeroinitializer
+ %mvmsk = bitcast <32 x i1> %cmp to i32
+ %inc = add i32 %mvmsk, 1
+ %iszero = icmp eq i32 %inc, 0
+ %ext = zext i32 %mvmsk to i64
+ %inc.ext = zext i32 %inc to i64
+ %sel = select i1 %iszero, i64 %inc.ext, i64 %ext
+ ret i64 %sel
+}
+
+define i64 @mov_dec_flags_live(<32 x i8> %x) local_unnamed_addr {
+; CHECK-LABEL: mov_dec_flags_live:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovmskb %ymm0, %ecx
+; CHECK-NEXT: movl %ecx, %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: cmovneq %rcx, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cmp = icmp slt <32 x i8> %x, zeroinitializer
+ %mvmsk = bitcast <32 x i1> %cmp to i32
+ %dec = add i32 %mvmsk, -1
+ %iszero = icmp eq i32 %dec, 0
+ %ext = zext i32 %mvmsk to i64
+ %dec.ext = zext i32 %dec to i64
+ %sel = select i1 %iszero, i64 %dec.ext, i64 %ext
+ ret i64 %sel
+}
+
+define i64 @mov_inc_not_adjacent(<32 x i8> %x) local_unnamed_addr {
+; CHECK-LABEL: mov_inc_not_adjacent:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vpmovmskb %ymm0, %eax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shlq $32, %rcx
+; CHECK-NEXT: incl %eax
+; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: xorq $5, %rax
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %cmp = icmp slt <32 x i8> %x, zeroinitializer
+ %mvmsk = bitcast <32 x i1> %cmp to i32
+ %ext = zext i32 %mvmsk to i64
+ %shl = shl nuw i64 %ext, 32
+ %tmp = xor i64 %shl, 5
+ %inc = add i32 %mvmsk, 1
+ %inc.ext = zext i32 %inc to i64
+ %res = xor i64 %tmp, %inc.ext
+ ret i64 %res
+}
diff --git a/llvm/test/CodeGen/X86/pr44412.ll b/llvm/test/CodeGen/X86/pr44412.ll
index 546dbcc156129..331d9f0e7d45e 100644
--- a/llvm/test/CodeGen/X86/pr44412.ll
+++ b/llvm/test/CodeGen/X86/pr44412.ll
@@ -8,8 +8,7 @@ define void @bar(i32 %0, i32 %1) nounwind {
; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: movl %edi, %ebx
-; CHECK-NEXT: decl %ebx
+; CHECK-NEXT: leal -1(%rdi), %ebx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %ebx, %edi
@@ -41,8 +40,7 @@ define void @baz(i32 %0, i32 %1) nounwind {
; CHECK-NEXT: je .LBB1_4
; CHECK-NEXT: # %bb.1: # %.preheader
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: movl %edi, %ebx
-; CHECK-NEXT: decl %ebx
+; CHECK-NEXT: leal -1(%rdi), %ebx
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_2: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movl %ebx, %edi
>From 0c25eb25122725fff4bbc22f0a0d2390ffabea76 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Sun, 8 Mar 2026 09:39:24 -0400
Subject: [PATCH 3/4] add +slow-3ops-lea and -slow-3ops-lea to check if the
generated code is any different to test file
---
llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
index 4690fbd0cb5c3..6fcc2371f2024 100644
--- a/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
+++ b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,+slow-3ops-lea | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-slow-3ops-lea | FileCheck %s
define i64 @mov_dec(<32 x i8> %x) local_unnamed_addr {
; CHECK-LABEL: mov_dec:
>From d4b76999e5ca7793c7d56941978521919710ef81 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Sun, 8 Mar 2026 14:12:40 -0400
Subject: [PATCH 4/4] remove the negative tests from asm generation and move
those tests to MIR generation while only running fix up leas pass
---
.../CodeGen/MIR/X86/mov-inc-dec-to-lea.mir | 78 +++++++++++++++++++
llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll | 64 +--------------
2 files changed, 80 insertions(+), 62 deletions(-)
create mode 100644 llvm/test/CodeGen/MIR/X86/mov-inc-dec-to-lea.mir
diff --git a/llvm/test/CodeGen/MIR/X86/mov-inc-dec-to-lea.mir b/llvm/test/CodeGen/MIR/X86/mov-inc-dec-to-lea.mir
new file mode 100644
index 0000000000000..df901fc9874d1
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/mov-inc-dec-to-lea.mir
@@ -0,0 +1,78 @@
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=x86-fixup-leas %s -o - | FileCheck %s
+
+---
+name: mov_inc_fold
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $rcx
+ $eax = MOV32rr $ecx
+ $eax = INC32r killed $eax, implicit-def dead $eflags
+ ; CHECK: $eax = LEA64_32r $rcx, 1, $noreg, 1, $noreg
+ RET64 $rax
+...
+---
+name: mov_dec_fold
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $rcx
+ $eax = MOV32rr $ecx
+ $eax = DEC32r killed $eax, implicit-def dead $eflags
+ ; CHECK: $eax = LEA64_32r $rcx, 1, $noreg, -1, $noreg
+ RET64 $rax
+...
+---
+name: mov_inc_flags_live
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $rcx
+ $eax = MOV32rr $ecx, implicit-def $rax
+ $eax = INC32r $eax, implicit-def $eflags, implicit killed $rax, implicit-def $rax
+ $rax = CMOV64rr killed $rax, killed $rcx, 5, implicit killed $eflags
+ ; CHECK-NOT: LEA
+ ; CHECK: INC32r
+ RET64 $rax
+...
+---
+name: mov_inc_not_adjacent
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $rcx
+ $eax = MOV32rr $ecx, implicit-def $rax
+ $rcx = SHL64ri killed $rcx, 32, implicit-def dead $eflags
+ $eax = INC32r killed $eax, implicit-def dead $eflags, implicit killed $rax, implicit-def $rax
+ $rax = OR64rr killed $rax, killed $rcx, implicit-def dead $eflags
+ ; CHECK-NOT: LEA
+ ; CHECK: INC32r
+ RET64 $rax
+...
+---
+name: mov_dec_flags_live
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $rcx
+ $eax = MOV32rr $ecx, implicit-def $rax
+ $eax = DEC32r $eax, implicit-def $eflags, implicit killed $rax, implicit-def $rax
+ $rax = CMOV64rr killed $rax, killed $rcx, 5, implicit killed $eflags
+ ; CHECK-NOT: LEA
+ ; CHECK: DEC32r
+ RET64 $rax
+...
+---
+name: mov_dec_not_adjacent
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $rcx
+ $eax = MOV32rr $ecx, implicit-def $rax
+ $rcx = SHL64ri killed $rcx, 32, implicit-def dead $eflags
+ $eax = DEC32r killed $eax, implicit-def dead $eflags, implicit killed $rax, implicit-def $rax
+ $rax = OR64rr killed $rax, killed $rcx, implicit-def dead $eflags
+ ; CHECK-NOT: LEA
+ ; CHECK: DEC32r
+ RET64 $rax
+...
diff --git a/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
index 6fcc2371f2024..0f861d646584b 100644
--- a/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
+++ b/llvm/test/CodeGen/X86/mov-inc-dec-to-lea.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,+slow-3ops-lea | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,-slow-3ops-lea | FileCheck %s
-define i64 @mov_dec(<32 x i8> %x) local_unnamed_addr {
+define i64 @mov_dec(<32 x i8> %x) {
; CHECK-LABEL: mov_dec:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovmskb %ymm0, %ecx
@@ -21,7 +21,7 @@ define i64 @mov_dec(<32 x i8> %x) local_unnamed_addr {
ret i64 %res
}
-define i64 @mov_inc(<32 x i8> %x) local_unnamed_addr {
+define i64 @mov_inc(<32 x i8> %x) {
; CHECK-LABEL: mov_inc:
; CHECK: # %bb.0:
; CHECK-NEXT: vpmovmskb %ymm0, %ecx
@@ -39,63 +39,3 @@ define i64 @mov_inc(<32 x i8> %x) local_unnamed_addr {
%res = or disjoint i64 %shl, %inc.ext
ret i64 %res
}
-
-define i64 @mov_inc_flags_live(<32 x i8> %x) local_unnamed_addr {
-; CHECK-LABEL: mov_inc_flags_live:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpmovmskb %ymm0, %ecx
-; CHECK-NEXT: movl %ecx, %eax
-; CHECK-NEXT: incl %eax
-; CHECK-NEXT: cmovneq %rcx, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
- %cmp = icmp slt <32 x i8> %x, zeroinitializer
- %mvmsk = bitcast <32 x i1> %cmp to i32
- %inc = add i32 %mvmsk, 1
- %iszero = icmp eq i32 %inc, 0
- %ext = zext i32 %mvmsk to i64
- %inc.ext = zext i32 %inc to i64
- %sel = select i1 %iszero, i64 %inc.ext, i64 %ext
- ret i64 %sel
-}
-
-define i64 @mov_dec_flags_live(<32 x i8> %x) local_unnamed_addr {
-; CHECK-LABEL: mov_dec_flags_live:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpmovmskb %ymm0, %ecx
-; CHECK-NEXT: movl %ecx, %eax
-; CHECK-NEXT: decl %eax
-; CHECK-NEXT: cmovneq %rcx, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
- %cmp = icmp slt <32 x i8> %x, zeroinitializer
- %mvmsk = bitcast <32 x i1> %cmp to i32
- %dec = add i32 %mvmsk, -1
- %iszero = icmp eq i32 %dec, 0
- %ext = zext i32 %mvmsk to i64
- %dec.ext = zext i32 %dec to i64
- %sel = select i1 %iszero, i64 %dec.ext, i64 %ext
- ret i64 %sel
-}
-
-define i64 @mov_inc_not_adjacent(<32 x i8> %x) local_unnamed_addr {
-; CHECK-LABEL: mov_inc_not_adjacent:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vpmovmskb %ymm0, %eax
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: shlq $32, %rcx
-; CHECK-NEXT: incl %eax
-; CHECK-NEXT: orq %rcx, %rax
-; CHECK-NEXT: xorq $5, %rax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
- %cmp = icmp slt <32 x i8> %x, zeroinitializer
- %mvmsk = bitcast <32 x i1> %cmp to i32
- %ext = zext i32 %mvmsk to i64
- %shl = shl nuw i64 %ext, 32
- %tmp = xor i64 %shl, 5
- %inc = add i32 %mvmsk, 1
- %inc.ext = zext i32 %inc to i64
- %res = xor i64 %tmp, %inc.ext
- ret i64 %res
-}
More information about the llvm-commits
mailing list