[llvm] [X86] Distribute Certain Bitwise Operations over SELECT (PR #136555)
Marius Kamp via llvm-commits
llvm-commits at lists.llvm.org
Thu May 29 07:36:51 PDT 2025
https://github.com/mskamp updated https://github.com/llvm/llvm-project/pull/136555
>From 04365dc42c4c84bf592313c93a6b5b8345ede19f Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sun, 30 Mar 2025 07:45:47 +0200
Subject: [PATCH 1/2] [X86] Add Tests for Distributing AND/XOR over SELECT; NFC
---
llvm/test/CodeGen/X86/bmi-select-distrib.ll | 915 ++++++++++++++++++++
1 file changed, 915 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/bmi-select-distrib.ll
diff --git a/llvm/test/CodeGen/X86/bmi-select-distrib.ll b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
new file mode 100644
index 0000000000000..1147561e22d06
--- /dev/null
+++ b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
@@ -0,0 +1,915 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov,+sse2,+bmi,+bmi2 | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=X64
+
+define i32 @and_select_neg_to_blsi1(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_neg_to_blsi1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_neg_to_blsi1:
+; X64: # %bb.0:
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = sub i32 0, %a1
+ %bls = select i1 %a0, i32 %sub, i32 -1
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+define i32 @and_select_neg_to_blsi2(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_neg_to_blsi2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_neg_to_blsi2:
+; X64: # %bb.0:
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = sub i32 0, %a1
+ %bls = select i1 %a0, i32 %sub, i32 -1
+ %ret = and i32 %bls, %a1
+ ret i32 %ret
+}
+
+define i32 @and_select_neg_to_blsi3(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_neg_to_blsi3:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: negl %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: negb %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_neg_to_blsi3:
+; X64: # %bb.0:
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: negb %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = sub i32 0, %a1
+ %bls = select i1 %a0, i32 -1, i32 %sub
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+define i64 @and_select_neg_to_blsi_i64(i1 %a0, i64 %a1) nounwind {
+; X86-LABEL: and_select_neg_to_blsi_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: andb $1, %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: negl %edi
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: cmpb $1, %bl
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %edi, %eax
+; X86-NEXT: andl %esi, %edx
+; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_neg_to_blsi_i64:
+; X64: # %bb.0:
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: movq %rsi, %rcx
+; X64-NEXT: negq %rcx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbq %rax, %rax
+; X64-NEXT: orq %rcx, %rax
+; X64-NEXT: andq %rsi, %rax
+; X64-NEXT: retq
+ %sub = sub i64 0, %a1
+ %bls = select i1 %a0, i64 %sub, i64 -1
+ %ret = and i64 %a1, %bls
+ ret i64 %ret
+}
+
+; Negative test
+define i16 @and_select_neg_i16(i1 %a0, i16 %a1) nounwind {
+; X86-LABEL: and_select_neg_i16:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: movl %edx, %esi
+; X86-NEXT: negl %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_neg_i16:
+; X64: # %bb.0:
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %sub = sub i16 0, %a1
+ %bls = select i1 %a0, i16 %sub, i16 -1
+ %ret = and i16 %a1, %bls
+ ret i16 %ret
+}
+
+; Negative test
+define <4 x i32> @and_select_neg_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
+; X86-LABEL: and_select_neg_v4xi32:
+; X86: # %bb.0:
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: jne .LBB5_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: pcmpeqd %xmm1, %xmm1
+; X86-NEXT: pand %xmm1, %xmm0
+; X86-NEXT: retl
+; X86-NEXT: .LBB5_1:
+; X86-NEXT: pxor %xmm1, %xmm1
+; X86-NEXT: psubd %xmm0, %xmm1
+; X86-NEXT: pand %xmm1, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_neg_v4xi32:
+; X64: # %bb.0:
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: jne .LBB5_1
+; X64-NEXT: # %bb.2:
+; X64-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-NEXT: pand %xmm1, %xmm0
+; X64-NEXT: retq
+; X64-NEXT: .LBB5_1:
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: psubd %xmm0, %xmm1
+; X64-NEXT: pand %xmm1, %xmm0
+; X64-NEXT: retq
+ %sub = sub <4 x i32> zeroinitializer, %a1
+ %bls = select i1 %a0, <4 x i32> %sub, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+ %ret = and <4 x i32> %a1, %bls
+ ret <4 x i32> %ret
+}
+
+; Negative test
+define i32 @and_select_no_neg(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_no_neg:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %edx, %edx
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_no_neg:
+; X64: # %bb.0:
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %esi, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = sub i32 %a1, 0
+ %bls = select i1 %a0, i32 %sub, i32 -1
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @and_select_neg_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_neg_wrong_const:
+; X86: # %bb.0:
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, %edx
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_neg_wrong_const:
+; X64: # %bb.0:
+; X64-NEXT: movl %esi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $1, %eax
+; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = sub i32 0, %a1
+ %bls = select i1 %a0, i32 %sub, i32 1
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @and_select_neg_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
+; X86-LABEL: and_select_neg_different_op:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: negl %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_neg_different_op:
+; X64: # %bb.0:
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: negl %edx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %edx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = sub i32 0, %a2
+ %bls = select i1 %a0, i32 %sub, i32 -1
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+define i32 @and_select_sub_1_to_blsr1(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_sub_1_to_blsr1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: leal -1(%eax), %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_sub_1_to_blsr1:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a1, -1
+ %bls = select i1 %a0, i32 %sub, i32 -1
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+define i32 @and_select_sub_1_to_blsr2(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_sub_1_to_blsr2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: leal -1(%eax), %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_sub_1_to_blsr2:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a1, -1
+ %bls = select i1 %a0, i32 %sub, i32 -1
+ %ret = and i32 %bls, %a1
+ ret i32 %ret
+}
+
+define i32 @and_select_sub_1_to_blsr3(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_sub_1_to_blsr3:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: leal -1(%eax), %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: negb %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_sub_1_to_blsr3:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: negb %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a1, -1
+ %bls = select i1 %a0, i32 -1, i32 %sub
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+define i32 @and_select_sub_1_to_blsr4(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_sub_1_to_blsr4:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: leal -1(%eax), %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_sub_1_to_blsr4:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = sub i32 %a1, 1
+ %bls = select i1 %a0, i32 %sub, i32 -1
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+define i64 @and_select_sub_1_to_blsr_i64(i1 %a0, i64 %a1) nounwind {
+; X86-LABEL: and_select_sub_1_to_blsr_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT: andb $1, %bl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: addl $-1, %edi
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: adcl $-1, %edx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpb $1, %bl
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: orl %edi, %eax
+; X86-NEXT: andl %esi, %edx
+; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_sub_1_to_blsr_i64:
+; X64: # %bb.0:
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: leaq -1(%rsi), %rcx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbq %rax, %rax
+; X64-NEXT: orq %rcx, %rax
+; X64-NEXT: andq %rsi, %rax
+; X64-NEXT: retq
+ %sub = add i64 %a1, -1
+ %bls = select i1 %a0, i64 %sub, i64 -1
+ %ret = and i64 %a1, %bls
+ ret i64 %ret
+}
+
+; Negative test
+define i16 @and_select_sub_1_i16(i1 %a0, i16 %a1) nounwind {
+; X86-LABEL: and_select_sub_1_i16:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: leal -1(%edx), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %eax, %eax
+; X86-NEXT: orl %esi, %eax
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_sub_1_i16:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %sub = add i16 %a1, -1
+ %bls = select i1 %a0, i16 %sub, i16 -1
+ %ret = and i16 %a1, %bls
+ ret i16 %ret
+}
+
+; Negative test
+define <4 x i32> @and_select_sub_1_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
+; X86-LABEL: and_select_sub_1_v4xi32:
+; X86: # %bb.0:
+; X86-NEXT: pcmpeqd %xmm1, %xmm1
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: je .LBB15_2
+; X86-NEXT: # %bb.1:
+; X86-NEXT: paddd %xmm0, %xmm1
+; X86-NEXT: .LBB15_2:
+; X86-NEXT: pand %xmm1, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_sub_1_v4xi32:
+; X64: # %bb.0:
+; X64-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: je .LBB15_2
+; X64-NEXT: # %bb.1:
+; X64-NEXT: paddd %xmm0, %xmm1
+; X64-NEXT: .LBB15_2:
+; X64-NEXT: pand %xmm1, %xmm0
+; X64-NEXT: retq
+ %sub = add <4 x i32> %a1, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bls = select i1 %a0, <4 x i32> %sub, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+ %ret = and <4 x i32> %a1, %bls
+ ret <4 x i32> %ret
+}
+
+; Negative test
+define i32 @and_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_no_sub_1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: leal -2(%eax), %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_no_sub_1:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: leal -2(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a1, -2
+ %bls = select i1 %a0, i32 %sub, i32 -1
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @and_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: and_select_sub_1_wrong_const:
+; X86: # %bb.0:
+; X86-NEXT: leal -1(%eax), %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: movl $1, %edx
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: andl %edx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_sub_1_wrong_const:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: movl $1, %eax
+; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a1, -1
+ %bls = select i1 %a0, i32 %sub, i32 1
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @and_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
+; X86-LABEL: and_select_sub_1_different_op:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: decl %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: andl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: and_select_sub_1_different_op:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edx killed $edx def $rdx
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: leal -1(%rdx), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a2, -1
+ %bls = select i1 %a0, i32 %sub, i32 -1
+ %ret = and i32 %a1, %bls
+ ret i32 %ret
+}
+
+define i32 @xor_select_sub_1_to_blsmsk1(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: xor_select_sub_1_to_blsmsk1:
+; X86: # %bb.0:
+; X86-NEXT: leal -1(%eax), %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: xor_select_sub_1_to_blsmsk1:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a1, -1
+ %bls = select i1 %a0, i32 %sub, i32 0
+ %ret = xor i32 %a1, %bls
+ ret i32 %ret
+}
+
+define i32 @xor_select_sub_1_to_blsmsk2(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: xor_select_sub_1_to_blsmsk2:
+; X86: # %bb.0:
+; X86-NEXT: leal -1(%eax), %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: xor_select_sub_1_to_blsmsk2:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a1, -1
+ %bls = select i1 %a0, i32 %sub, i32 0
+ %ret = xor i32 %bls, %a1
+ ret i32 %ret
+}
+
+define i32 @xor_select_sub_1_to_blsmsk3(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: xor_select_sub_1_to_blsmsk3:
+; X86: # %bb.0:
+; X86-NEXT: leal -1(%eax), %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovel %ecx, %edx
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: xor_select_sub_1_to_blsmsk3:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovel %ecx, %eax
+; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a1, -1
+ %bls = select i1 %a0, i32 0, i32 %sub
+ %ret = xor i32 %a1, %bls
+ ret i32 %ret
+}
+
+define i32 @xor_select_sub_1_to_blsmsk4(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: xor_select_sub_1_to_blsmsk4:
+; X86: # %bb.0:
+; X86-NEXT: leal -1(%eax), %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: xor_select_sub_1_to_blsmsk4:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: retq
+ %sub = sub i32 %a1, 1
+ %bls = select i1 %a0, i32 %sub, i32 0
+ %ret = xor i32 %a1, %bls
+ ret i32 %ret
+}
+
+define i64 @xor_select_sub_1_to_blsmsk_i64(i1 %a0, i64 %a1) nounwind {
+; X86-LABEL: xor_select_sub_1_to_blsmsk_i64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: addl $-1, %eax
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: adcl $-1, %edx
+; X86-NEXT: xorl %edi, %edi
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovel %edi, %eax
+; X86-NEXT: cmovel %edi, %edx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: xorl %esi, %edx
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
+;
+; X64-LABEL: xor_select_sub_1_to_blsmsk_i64:
+; X64: # %bb.0:
+; X64-NEXT: leaq -1(%rsi), %rcx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovneq %rcx, %rax
+; X64-NEXT: xorq %rsi, %rax
+; X64-NEXT: retq
+ %sub = add i64 %a1, -1
+ %bls = select i1 %a0, i64 %sub, i64 0
+ %ret = xor i64 %a1, %bls
+ ret i64 %ret
+}
+
+; Negative test
+define i16 @xor_select_sub_1_i16(i1 %a0, i16 %a1) nounwind {
+; X86-LABEL: xor_select_sub_1_i16:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal -1(%ecx), %edx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %edx, %eax
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: xor_select_sub_1_i16:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %sub = add i16 %a1, -1
+ %bls = select i1 %a0, i16 %sub, i16 0
+ %ret = xor i16 %a1, %bls
+ ret i16 %ret
+}
+
+; Negative test
+define <4 x i32> @xor_select_sub_1_v4xi32(i1 %a0, <4 x i32> %a1) nounwind {
+; X86-LABEL: xor_select_sub_1_v4xi32:
+; X86: # %bb.0:
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: jne .LBB25_1
+; X86-NEXT: # %bb.2:
+; X86-NEXT: xorps %xmm1, %xmm1
+; X86-NEXT: xorps %xmm1, %xmm0
+; X86-NEXT: retl
+; X86-NEXT: .LBB25_1:
+; X86-NEXT: pcmpeqd %xmm1, %xmm1
+; X86-NEXT: paddd %xmm0, %xmm1
+; X86-NEXT: pxor %xmm1, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: xor_select_sub_1_v4xi32:
+; X64: # %bb.0:
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: jne .LBB25_1
+; X64-NEXT: # %bb.2:
+; X64-NEXT: xorps %xmm1, %xmm1
+; X64-NEXT: xorps %xmm1, %xmm0
+; X64-NEXT: retq
+; X64-NEXT: .LBB25_1:
+; X64-NEXT: pcmpeqd %xmm1, %xmm1
+; X64-NEXT: paddd %xmm0, %xmm1
+; X64-NEXT: pxor %xmm1, %xmm0
+; X64-NEXT: retq
+ %sub = add <4 x i32> %a1, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bls = select i1 %a0, <4 x i32> %sub, <4 x i32> zeroinitializer
+ %ret = xor <4 x i32> %a1, %bls
+ ret <4 x i32> %ret
+}
+
+; Negative test
+define i32 @xor_select_no_sub_1(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: xor_select_no_sub_1:
+; X86: # %bb.0:
+; X86-NEXT: leal 1(%eax), %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: xor_select_no_sub_1:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: leal 1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a1, 1
+ %bls = select i1 %a0, i32 %sub, i32 0
+ %ret = xor i32 %a1, %bls
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @xor_select_sub_1_wrong_const(i1 %a0, i32 inreg %a1) nounwind {
+; X86-LABEL: xor_select_sub_1_wrong_const:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: andb $1, %cl
+; X86-NEXT: leal -1(%eax), %edx
+; X86-NEXT: xorl %esi, %esi
+; X86-NEXT: cmpb $1, %cl
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: orl %edx, %esi
+; X86-NEXT: xorl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: xor_select_sub_1_wrong_const:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: leal -1(%rsi), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpb $1, %dil
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a1, -1
+ %bls = select i1 %a0, i32 %sub, i32 -1
+ %ret = xor i32 %a1, %bls
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @xor_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) nounwind {
+; X86-LABEL: xor_select_sub_1_different_op:
+; X86: # %bb.0:
+; X86-NEXT: leal -1(%edx), %ecx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: xor_select_sub_1_different_op:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edx killed $edx def $rdx
+; X64-NEXT: leal -1(%rdx), %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovnel %ecx, %eax
+; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: retq
+ %sub = add i32 %a2, -1
+ %bls = select i1 %a0, i32 %sub, i32 0
+ %ret = xor i32 %a1, %bls
+ ret i32 %ret
+}
>From f06d6446f666f8d1f1125caebe09d8bf86c35880 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sun, 30 Mar 2025 08:58:00 +0200
Subject: [PATCH 2/2] [X86] Distribute Certain Bitwise Operations over SELECT
InstCombine canonicalizes `(select P (and X (- X)) X)` to
`(and (select P (- X) umax) X)`. This is counterproductive for the X86
backend when BMI is available because we can encode `(and X (- X))`
using the `BLSI` instruction. A similar situation arises if we have
`(select P (and X (sub X 1)) X)` (prevents use of `BLSR` instruction) or
`(select P (xor X (sub X 1)) X)` (prevents use of `BLSMSK` instruction).
Trigger the inverse transformation in the X86 backend if BMI is
available and we can use the mentioned BMI instructions. This is done by
adjusting the `shouldFoldSelectWithIdentityConstant()` implementation
for the X86 backend. In this way, we get `(select P (and X (- X)) X)`
again, which enables the use of `BLSI` (similar for the other cases
described above).
Alive proofs: https://alive2.llvm.org/ce/z/MT_pKi
Fixes #131587, fixes #133848.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +-
llvm/test/CodeGen/X86/bmi-select-distrib.ll | 299 ++++++--------------
2 files changed, 99 insertions(+), 219 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6b71f49165c60..e7dcf4a91e8fc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -35552,8 +35553,24 @@ bool X86TargetLowering::isNarrowingProfitable(SDNode *N, EVT SrcVT,
bool X86TargetLowering::shouldFoldSelectWithIdentityConstant(
unsigned BinOpcode, EVT VT, unsigned SelectOpcode, SDValue X,
SDValue Y) const {
- if (SelectOpcode != ISD::VSELECT)
+ if (SelectOpcode == ISD::SELECT) {
+ if (VT.isVector())
+ return false;
+ if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
+ return false;
+ using namespace llvm::SDPatternMatch;
+ // BLSI
+ if (BinOpcode == ISD::AND && sd_match(Y, m_Neg(m_Specific(X))))
+ return true;
+ // BLSR
+ if (BinOpcode == ISD::AND && sd_match(Y, m_Add(m_Specific(X), m_AllOnes())))
+ return true;
+ // BLSMSK
+ if (BinOpcode == ISD::XOR && sd_match(Y, m_Add(m_Specific(X), m_AllOnes())))
+ return true;
+
return false;
+ }
// TODO: This is too general. There are cases where pre-AVX512 codegen would
// benefit. The transform may also be profitable for scalar code.
if (!Subtarget.hasAVX512())
diff --git a/llvm/test/CodeGen/X86/bmi-select-distrib.ll b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
index 1147561e22d06..466f877f57600 100644
--- a/llvm/test/CodeGen/X86/bmi-select-distrib.ll
+++ b/llvm/test/CodeGen/X86/bmi-select-distrib.ll
@@ -5,29 +5,16 @@
define i32 @and_select_neg_to_blsi1(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_neg_to_blsi1:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: negl %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: blsil %eax, %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_neg_to_blsi1:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: negl %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: andl %esi, %eax
+; X64-NEXT: blsil %esi, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
%sub = sub i32 0, %a1
%bls = select i1 %a0, i32 %sub, i32 -1
@@ -38,29 +25,16 @@ define i32 @and_select_neg_to_blsi1(i1 %a0, i32 inreg %a1) nounwind {
define i32 @and_select_neg_to_blsi2(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_neg_to_blsi2:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: negl %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: blsil %eax, %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_neg_to_blsi2:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: negl %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: andl %esi, %eax
+; X64-NEXT: blsil %esi, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
%sub = sub i32 0, %a1
%bls = select i1 %a0, i32 %sub, i32 -1
@@ -71,29 +45,16 @@ define i32 @and_select_neg_to_blsi2(i1 %a0, i32 inreg %a1) nounwind {
define i32 @and_select_neg_to_blsi3(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_neg_to_blsi3:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: negl %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: negb %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: blsil %eax, %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_neg_to_blsi3:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: movl %esi, %ecx
-; X64-NEXT: negl %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: negb %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: andl %esi, %eax
+; X64-NEXT: blsil %esi, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovnel %esi, %eax
; X64-NEXT: retq
%sub = sub i32 0, %a1
%bls = select i1 %a0, i32 -1, i32 %sub
@@ -104,39 +65,26 @@ define i32 @and_select_neg_to_blsi3(i1 %a0, i32 inreg %a1) nounwind {
define i64 @and_select_neg_to_blsi_i64(i1 %a0, i64 %a1) nounwind {
; X86-LABEL: and_select_neg_to_blsi_i64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: andb $1, %bl
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: negl %edi
-; X86-NEXT: movl $0, %edx
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: cmpb $1, %bl
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: orl %edi, %eax
; X86-NEXT: andl %esi, %edx
; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovel %esi, %edx
+; X86-NEXT: cmovel %ecx, %eax
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: and_select_neg_to_blsi_i64:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: movq %rsi, %rcx
-; X64-NEXT: negq %rcx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbq %rax, %rax
-; X64-NEXT: orq %rcx, %rax
-; X64-NEXT: andq %rsi, %rax
+; X64-NEXT: blsiq %rsi, %rax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmoveq %rsi, %rax
; X64-NEXT: retq
%sub = sub i64 0, %a1
%bls = select i1 %a0, i64 %sub, i64 -1
@@ -306,28 +254,16 @@ define i32 @and_select_neg_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2) no
define i32 @and_select_sub_1_to_blsr1(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_sub_1_to_blsr1:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: leal -1(%eax), %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: blsrl %eax, %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_sub_1_to_blsr1:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: andl %esi, %eax
+; X64-NEXT: blsrl %esi, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a1, -1
%bls = select i1 %a0, i32 %sub, i32 -1
@@ -338,28 +274,16 @@ define i32 @and_select_sub_1_to_blsr1(i1 %a0, i32 inreg %a1) nounwind {
define i32 @and_select_sub_1_to_blsr2(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_sub_1_to_blsr2:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: leal -1(%eax), %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: blsrl %eax, %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_sub_1_to_blsr2:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: andl %esi, %eax
+; X64-NEXT: blsrl %esi, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a1, -1
%bls = select i1 %a0, i32 %sub, i32 -1
@@ -370,28 +294,16 @@ define i32 @and_select_sub_1_to_blsr2(i1 %a0, i32 inreg %a1) nounwind {
define i32 @and_select_sub_1_to_blsr3(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_sub_1_to_blsr3:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: leal -1(%eax), %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: negb %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: blsrl %eax, %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_sub_1_to_blsr3:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: negb %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: andl %esi, %eax
+; X64-NEXT: blsrl %esi, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovnel %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a1, -1
%bls = select i1 %a0, i32 -1, i32 %sub
@@ -402,28 +314,16 @@ define i32 @and_select_sub_1_to_blsr3(i1 %a0, i32 inreg %a1) nounwind {
define i32 @and_select_sub_1_to_blsr4(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: and_select_sub_1_to_blsr4:
; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andb $1, %cl
-; X86-NEXT: leal -1(%eax), %edx
-; X86-NEXT: xorl %esi, %esi
-; X86-NEXT: cmpb $1, %cl
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: popl %esi
+; X86-NEXT: blsrl %eax, %ecx
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: and_select_sub_1_to_blsr4:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: orl %ecx, %eax
-; X64-NEXT: andl %esi, %eax
+; X64-NEXT: blsrl %esi, %eax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
%sub = sub i32 %a1, 1
%bls = select i1 %a0, i32 %sub, i32 -1
@@ -434,38 +334,26 @@ define i32 @and_select_sub_1_to_blsr4(i1 %a0, i32 inreg %a1) nounwind {
define i64 @and_select_sub_1_to_blsr_i64(i1 %a0, i64 %a1) nounwind {
; X86-LABEL: and_select_sub_1_to_blsr_i64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
-; X86-NEXT: andb $1, %bl
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: addl $-1, %edi
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: addl $-1, %eax
; X86-NEXT: movl %esi, %edx
; X86-NEXT: adcl $-1, %edx
-; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpb $1, %bl
-; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: orl %eax, %edx
-; X86-NEXT: orl %edi, %eax
; X86-NEXT: andl %esi, %edx
; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: cmovel %esi, %edx
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: and_select_sub_1_to_blsr_i64:
; X64: # %bb.0:
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: leaq -1(%rsi), %rcx
-; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpb $1, %dil
-; X64-NEXT: sbbq %rax, %rax
-; X64-NEXT: orq %rcx, %rax
-; X64-NEXT: andq %rsi, %rax
+; X64-NEXT: blsrq %rsi, %rax
+; X64-NEXT: testb $1, %dil
+; X64-NEXT: cmoveq %rsi, %rax
; X64-NEXT: retq
%sub = add i64 %a1, -1
%bls = select i1 %a0, i64 %sub, i64 -1
@@ -633,21 +521,16 @@ define i32 @and_select_sub_1_different_op(i1 %a0, i32 inreg %a1, i32 inreg %a2)
define i32 @xor_select_sub_1_to_blsmsk1(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: xor_select_sub_1_to_blsmsk1:
; X86: # %bb.0:
-; X86-NEXT: leal -1(%eax), %ecx
-; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: blsmskl %eax, %ecx
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: cmovnel %ecx, %edx
-; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: xor_select_sub_1_to_blsmsk1:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: blsmskl %esi, %eax
; X64-NEXT: testb $1, %dil
-; X64-NEXT: cmovnel %ecx, %eax
-; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a1, -1
%bls = select i1 %a0, i32 %sub, i32 0
@@ -658,21 +541,16 @@ define i32 @xor_select_sub_1_to_blsmsk1(i1 %a0, i32 inreg %a1) nounwind {
define i32 @xor_select_sub_1_to_blsmsk2(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: xor_select_sub_1_to_blsmsk2:
; X86: # %bb.0:
-; X86-NEXT: leal -1(%eax), %ecx
-; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: blsmskl %eax, %ecx
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: cmovnel %ecx, %edx
-; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: xor_select_sub_1_to_blsmsk2:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: blsmskl %esi, %eax
; X64-NEXT: testb $1, %dil
-; X64-NEXT: cmovnel %ecx, %eax
-; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a1, -1
%bls = select i1 %a0, i32 %sub, i32 0
@@ -683,21 +561,16 @@ define i32 @xor_select_sub_1_to_blsmsk2(i1 %a0, i32 inreg %a1) nounwind {
define i32 @xor_select_sub_1_to_blsmsk3(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: xor_select_sub_1_to_blsmsk3:
; X86: # %bb.0:
-; X86-NEXT: leal -1(%eax), %ecx
-; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: blsmskl %eax, %ecx
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: cmovel %ecx, %edx
-; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: cmovel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: xor_select_sub_1_to_blsmsk3:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: blsmskl %esi, %eax
; X64-NEXT: testb $1, %dil
-; X64-NEXT: cmovel %ecx, %eax
-; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: cmovnel %esi, %eax
; X64-NEXT: retq
%sub = add i32 %a1, -1
%bls = select i1 %a0, i32 0, i32 %sub
@@ -708,21 +581,16 @@ define i32 @xor_select_sub_1_to_blsmsk3(i1 %a0, i32 inreg %a1) nounwind {
define i32 @xor_select_sub_1_to_blsmsk4(i1 %a0, i32 inreg %a1) nounwind {
; X86-LABEL: xor_select_sub_1_to_blsmsk4:
; X86: # %bb.0:
-; X86-NEXT: leal -1(%eax), %ecx
-; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: blsmskl %eax, %ecx
; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: cmovnel %ecx, %edx
-; X86-NEXT: xorl %edx, %eax
+; X86-NEXT: cmovnel %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: xor_select_sub_1_to_blsmsk4:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: leal -1(%rsi), %ecx
-; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: blsmskl %esi, %eax
; X64-NEXT: testb $1, %dil
-; X64-NEXT: cmovnel %ecx, %eax
-; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: cmovel %esi, %eax
; X64-NEXT: retq
%sub = sub i32 %a1, 1
%bls = select i1 %a0, i32 %sub, i32 0
@@ -733,7 +601,6 @@ define i32 @xor_select_sub_1_to_blsmsk4(i1 %a0, i32 inreg %a1) nounwind {
define i64 @xor_select_sub_1_to_blsmsk_i64(i1 %a0, i64 %a1) nounwind {
; X86-LABEL: xor_select_sub_1_to_blsmsk_i64:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
@@ -741,23 +608,19 @@ define i64 @xor_select_sub_1_to_blsmsk_i64(i1 %a0, i64 %a1) nounwind {
; X86-NEXT: addl $-1, %eax
; X86-NEXT: movl %esi, %edx
; X86-NEXT: adcl $-1, %edx
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
-; X86-NEXT: cmovel %edi, %eax
-; X86-NEXT: cmovel %edi, %edx
-; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: xorl %esi, %edx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: testb $1, {{[0-9]+}}(%esp)
+; X86-NEXT: cmovel %ecx, %eax
+; X86-NEXT: cmovel %esi, %edx
; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: xor_select_sub_1_to_blsmsk_i64:
; X64: # %bb.0:
-; X64-NEXT: leaq -1(%rsi), %rcx
-; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: blsmskq %rsi, %rax
; X64-NEXT: testb $1, %dil
-; X64-NEXT: cmovneq %rcx, %rax
-; X64-NEXT: xorq %rsi, %rax
+; X64-NEXT: cmoveq %rsi, %rax
; X64-NEXT: retq
%sub = add i64 %a1, -1
%bls = select i1 %a0, i64 %sub, i64 0
More information about the llvm-commits
mailing list