[llvm] [X86] Fold AND(Y, XOR(X, SUB(0, X))) to ANDN(Y, BLSMSK(X)) (PR #128348)
Marius Kamp via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 22 09:30:27 PST 2025
https://github.com/mskamp updated https://github.com/llvm/llvm-project/pull/128348
>From c2b477dd676a249404387a5acdfa47d26072c372 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 16 Nov 2024 11:55:18 +0100
Subject: [PATCH 1/2] [X86] Add Tests for AND(Y, XOR(X, SUB(0, X))) -> ANDN(Y,
BLSMSK(X)); NFC
---
llvm/test/CodeGen/X86/andnot-blsmsk.ll | 278 +++++++++++++++++++++++++
1 file changed, 278 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/andnot-blsmsk.ll
diff --git a/llvm/test/CodeGen/X86/andnot-blsmsk.ll b/llvm/test/CodeGen/X86/andnot-blsmsk.ll
new file mode 100644
index 0000000000000..9242eefae00e8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/andnot-blsmsk.ll
@@ -0,0 +1,278 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-- -mattr=-bmi,+sse2 | FileCheck %s --check-prefixes=X86,X86-NOBMI
+; RUN: llc < %s -mtriple=i686-- -mattr=+bmi,+sse2 | FileCheck %s --check-prefixes=X86,X86-BMI
+; RUN: llc < %s -mtriple=x86_64-- -mattr=-bmi | FileCheck %s --check-prefixes=X64,X64-NOBMI
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-BMI
+
+declare void @use(i32)
+
+define i32 @fold_and_xor_neg_v1_32(i32 %x, i32 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_32:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: xorl %edi, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %neg = sub i32 0, %x
+ %xor = xor i32 %x, %neg
+ %and = and i32 %xor, %y
+ ret i32 %and
+}
+
+define i32 @fold_and_xor_neg_v2_32(i32 %x, i32 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v2_32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fold_and_xor_neg_v2_32:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: xorl %edi, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %neg = sub i32 0, %x
+ %xor = xor i32 %x, %neg
+ %and = and i32 %y, %xor
+ ret i32 %and
+}
+
+define i32 @fold_and_xor_neg_v3_32(i32 %x, i32 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v3_32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fold_and_xor_neg_v3_32:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: xorl %edi, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %neg = sub i32 0, %x
+ %xor = xor i32 %neg, %x
+ %and = and i32 %xor, %y
+ ret i32 %and
+}
+
+define i32 @fold_and_xor_neg_v4_32(i32 %x, i32 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v4_32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fold_and_xor_neg_v4_32:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: xorl %edi, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %neg = sub i32 0, %x
+ %xor = xor i32 %neg, %x
+ %and = and i32 %y, %xor
+ ret i32 %and
+}
+
+define i64 @fold_and_xor_neg_v1_64(i64 %x, i64 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_64:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: xorl %esi, %edx
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_64:
+; X64: # %bb.0:
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: negq %rax
+; X64-NEXT: xorq %rdi, %rax
+; X64-NEXT: andq %rsi, %rax
+; X64-NEXT: retq
+ %neg = sub i64 0, %x
+ %xor = xor i64 %x, %neg
+ %and = and i64 %xor, %y
+ ret i64 %and
+}
+
+; Negative test
+define i16 @fold_and_xor_neg_v1_16_negative(i16 %x, i16 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_16_negative:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: xorl %ecx, %eax
+; X86-NEXT: andw {{[0-9]+}}(%esp), %ax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_16_negative:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: xorl %edi, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+ %neg = sub i16 0, %x
+ %xor = xor i16 %x, %neg
+ %and = and i16 %xor, %y
+ ret i16 %and
+}
+
+; Negative test
+define <4 x i32> @fold_and_xor_neg_v1_v4x32_negative(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_v4x32_negative:
+; X86: # %bb.0:
+; X86-NEXT: pxor %xmm2, %xmm2
+; X86-NEXT: psubd %xmm0, %xmm2
+; X86-NEXT: pxor %xmm2, %xmm0
+; X86-NEXT: pand %xmm1, %xmm0
+; X86-NEXT: retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_v4x32_negative:
+; X64: # %bb.0:
+; X64-NEXT: pxor %xmm2, %xmm2
+; X64-NEXT: psubd %xmm0, %xmm2
+; X64-NEXT: pxor %xmm2, %xmm0
+; X64-NEXT: pand %xmm1, %xmm0
+; X64-NEXT: retq
+ %neg = sub <4 x i32> zeroinitializer, %x
+ %xor = xor <4 x i32> %x, %neg
+ %and = and <4 x i32> %xor, %y
+ ret <4 x i32> %and
+}
+
+; Negative test
+define i32 @fold_and_xor_neg_v1_32_two_uses_xor_negative(i32 %x, i32 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_32_two_uses_xor_negative:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: andl %ecx, %esi
+; X86-NEXT: pushl %ecx
+; X86-NEXT: calll use at PLT
+; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_32_two_uses_xor_negative:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: movl %esi, %ebx
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: xorl %eax, %edi
+; X64-NEXT: andl %edi, %ebx
+; X64-NEXT: callq use at PLT
+; X64-NEXT: movl %ebx, %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+ %neg = sub i32 0, %x
+ %xor = xor i32 %x, %neg
+ %and = and i32 %xor, %y
+ call void @use(i32 %xor)
+ ret i32 %and
+}
+
+; Negative test
+define i32 @fold_and_xor_neg_v1_32_two_uses_sub_negative(i32 %x, i32 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_32_two_uses_sub_negative:
+; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: negl %eax
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: andl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: pushl %eax
+; X86-NEXT: calll use at PLT
+; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_32_two_uses_sub_negative:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbx
+; X64-NEXT: movl %edi, %ebx
+; X64-NEXT: negl %edi
+; X64-NEXT: xorl %edi, %ebx
+; X64-NEXT: andl %esi, %ebx
+; X64-NEXT: callq use at PLT
+; X64-NEXT: movl %ebx, %eax
+; X64-NEXT: popq %rbx
+; X64-NEXT: retq
+ %neg = sub i32 0, %x
+ %xor = xor i32 %x, %neg
+ %and = and i32 %xor, %y
+ call void @use(i32 %neg)
+ ret i32 %and
+}
+
+; Negative test
+define i32 @fold_and_xor_neg_v1_32_no_blsmsk_negative(i32 %x, i32 %y, i32 %z) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_32_no_blsmsk_negative:
+; X86: # %bb.0:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_32_no_blsmsk_negative:
+; X64: # %bb.0:
+; X64-NEXT: movl %edx, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: xorl %edi, %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: retq
+ %neg = sub i32 0, %z
+ %xor = xor i32 %x, %neg
+ %and = and i32 %xor, %y
+ ret i32 %and
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; X64-BMI: {{.*}}
+; X64-NOBMI: {{.*}}
+; X86-BMI: {{.*}}
+; X86-NOBMI: {{.*}}
>From a93469ee045696a938104681c9e50e1da843fa2a Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 16 Nov 2024 12:13:30 +0100
Subject: [PATCH 2/2] [X86] Fold AND(Y, XOR(X, SUB(0, X))) to ANDN(Y,
BLSMSK(X))
XOR(X, SUB(0, X)) corresponds to a bitwise-negated BLSMSK instruction
(i.e., x ^ (x - 1)). On its own, this transformation is probably not
really profitable but when the XOR operation is an operand of an AND
operation, we can use an ANDN instruction to reduce the number of
emitted instructions by one.
Fixes #103501.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 30 ++++
llvm/test/CodeGen/X86/andnot-blsmsk.ll | 193 +++++++++++++++---------
2 files changed, 151 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1c9d43ce4c062..e27e30e36dd25 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51045,6 +51045,33 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
+static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ using namespace llvm::SDPatternMatch;
+
+ EVT VT = And->getValueType(0);
+ // Make sure this node is a candidate for BMI instructions.
+ if (!Subtarget.hasBMI() || !VT.isScalarInteger() ||
+ (VT != MVT::i32 && VT != MVT::i64))
+ return SDValue();
+
+ SDValue X;
+ SDValue OtherOp;
+ if (!sd_match(And, m_And(m_OneUse(m_Xor(m_Value(X),
+ m_OneUse(m_Neg(m_Deferred(X))))),
+ m_Value(OtherOp))))
+ return SDValue();
+
+ SDValue BLSMSK =
+ DAG.getNode(ISD::XOR, DL, VT, X,
+ DAG.getNode(ISD::SUB, DL, VT, X, DAG.getConstant(1, DL, VT)));
+ SDValue AndN =
+ DAG.getNode(ISD::AND, DL, VT, OtherOp, DAG.getNOT(DL, BLSMSK, VT));
+ return AndN;
+}
+
static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -51453,6 +51480,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
return R;
+ if (SDValue R = combineAndXorSubWithBMI(N, dl, DAG, Subtarget))
+ return R;
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/andnot-blsmsk.ll b/llvm/test/CodeGen/X86/andnot-blsmsk.ll
index 9242eefae00e8..74766821f6ce7 100644
--- a/llvm/test/CodeGen/X86/andnot-blsmsk.ll
+++ b/llvm/test/CodeGen/X86/andnot-blsmsk.ll
@@ -7,22 +7,34 @@
declare void @use(i32)
define i32 @fold_and_xor_neg_v1_32(i32 %x, i32 %y) nounwind {
-; X86-LABEL: fold_and_xor_neg_v1_32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: negl %eax
-; X86-NEXT: xorl %ecx, %eax
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: retl
+; X86-NOBMI-LABEL: fold_and_xor_neg_v1_32:
+; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movl %ecx, %eax
+; X86-NOBMI-NEXT: negl %eax
+; X86-NOBMI-NEXT: xorl %ecx, %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: retl
;
-; X64-LABEL: fold_and_xor_neg_v1_32:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: xorl %edi, %eax
-; X64-NEXT: andl %esi, %eax
-; X64-NEXT: retq
+; X86-BMI-LABEL: fold_and_xor_neg_v1_32:
+; X86-BMI: # %bb.0:
+; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT: retl
+;
+; X64-NOBMI-LABEL: fold_and_xor_neg_v1_32:
+; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %edi, %eax
+; X64-NOBMI-NEXT: negl %eax
+; X64-NOBMI-NEXT: xorl %edi, %eax
+; X64-NOBMI-NEXT: andl %esi, %eax
+; X64-NOBMI-NEXT: retq
+;
+; X64-BMI-LABEL: fold_and_xor_neg_v1_32:
+; X64-BMI: # %bb.0:
+; X64-BMI-NEXT: blsmskl %edi, %eax
+; X64-BMI-NEXT: andnl %esi, %eax, %eax
+; X64-BMI-NEXT: retq
%neg = sub i32 0, %x
%xor = xor i32 %x, %neg
%and = and i32 %xor, %y
@@ -30,22 +42,34 @@ define i32 @fold_and_xor_neg_v1_32(i32 %x, i32 %y) nounwind {
}
define i32 @fold_and_xor_neg_v2_32(i32 %x, i32 %y) nounwind {
-; X86-LABEL: fold_and_xor_neg_v2_32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: negl %eax
-; X86-NEXT: xorl %ecx, %eax
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: retl
+; X86-NOBMI-LABEL: fold_and_xor_neg_v2_32:
+; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movl %ecx, %eax
+; X86-NOBMI-NEXT: negl %eax
+; X86-NOBMI-NEXT: xorl %ecx, %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: retl
;
-; X64-LABEL: fold_and_xor_neg_v2_32:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: xorl %edi, %eax
-; X64-NEXT: andl %esi, %eax
-; X64-NEXT: retq
+; X86-BMI-LABEL: fold_and_xor_neg_v2_32:
+; X86-BMI: # %bb.0:
+; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT: retl
+;
+; X64-NOBMI-LABEL: fold_and_xor_neg_v2_32:
+; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %edi, %eax
+; X64-NOBMI-NEXT: negl %eax
+; X64-NOBMI-NEXT: xorl %edi, %eax
+; X64-NOBMI-NEXT: andl %esi, %eax
+; X64-NOBMI-NEXT: retq
+;
+; X64-BMI-LABEL: fold_and_xor_neg_v2_32:
+; X64-BMI: # %bb.0:
+; X64-BMI-NEXT: blsmskl %edi, %eax
+; X64-BMI-NEXT: andnl %esi, %eax, %eax
+; X64-BMI-NEXT: retq
%neg = sub i32 0, %x
%xor = xor i32 %x, %neg
%and = and i32 %y, %xor
@@ -53,22 +77,34 @@ define i32 @fold_and_xor_neg_v2_32(i32 %x, i32 %y) nounwind {
}
define i32 @fold_and_xor_neg_v3_32(i32 %x, i32 %y) nounwind {
-; X86-LABEL: fold_and_xor_neg_v3_32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: negl %eax
-; X86-NEXT: xorl %ecx, %eax
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: retl
+; X86-NOBMI-LABEL: fold_and_xor_neg_v3_32:
+; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movl %ecx, %eax
+; X86-NOBMI-NEXT: negl %eax
+; X86-NOBMI-NEXT: xorl %ecx, %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: retl
;
-; X64-LABEL: fold_and_xor_neg_v3_32:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: xorl %edi, %eax
-; X64-NEXT: andl %esi, %eax
-; X64-NEXT: retq
+; X86-BMI-LABEL: fold_and_xor_neg_v3_32:
+; X86-BMI: # %bb.0:
+; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT: retl
+;
+; X64-NOBMI-LABEL: fold_and_xor_neg_v3_32:
+; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %edi, %eax
+; X64-NOBMI-NEXT: negl %eax
+; X64-NOBMI-NEXT: xorl %edi, %eax
+; X64-NOBMI-NEXT: andl %esi, %eax
+; X64-NOBMI-NEXT: retq
+;
+; X64-BMI-LABEL: fold_and_xor_neg_v3_32:
+; X64-BMI: # %bb.0:
+; X64-BMI-NEXT: blsmskl %edi, %eax
+; X64-BMI-NEXT: andnl %esi, %eax, %eax
+; X64-BMI-NEXT: retq
%neg = sub i32 0, %x
%xor = xor i32 %neg, %x
%and = and i32 %xor, %y
@@ -76,22 +112,34 @@ define i32 @fold_and_xor_neg_v3_32(i32 %x, i32 %y) nounwind {
}
define i32 @fold_and_xor_neg_v4_32(i32 %x, i32 %y) nounwind {
-; X86-LABEL: fold_and_xor_neg_v4_32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: negl %eax
-; X86-NEXT: xorl %ecx, %eax
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: retl
+; X86-NOBMI-LABEL: fold_and_xor_neg_v4_32:
+; X86-NOBMI: # %bb.0:
+; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT: movl %ecx, %eax
+; X86-NOBMI-NEXT: negl %eax
+; X86-NOBMI-NEXT: xorl %ecx, %eax
+; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT: retl
;
-; X64-LABEL: fold_and_xor_neg_v4_32:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: negl %eax
-; X64-NEXT: xorl %edi, %eax
-; X64-NEXT: andl %esi, %eax
-; X64-NEXT: retq
+; X86-BMI-LABEL: fold_and_xor_neg_v4_32:
+; X86-BMI: # %bb.0:
+; X86-BMI-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT: retl
+;
+; X64-NOBMI-LABEL: fold_and_xor_neg_v4_32:
+; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movl %edi, %eax
+; X64-NOBMI-NEXT: negl %eax
+; X64-NOBMI-NEXT: xorl %edi, %eax
+; X64-NOBMI-NEXT: andl %esi, %eax
+; X64-NOBMI-NEXT: retq
+;
+; X64-BMI-LABEL: fold_and_xor_neg_v4_32:
+; X64-BMI: # %bb.0:
+; X64-BMI-NEXT: blsmskl %edi, %eax
+; X64-BMI-NEXT: andnl %esi, %eax, %eax
+; X64-BMI-NEXT: retq
%neg = sub i32 0, %x
%xor = xor i32 %neg, %x
%and = and i32 %y, %xor
@@ -115,13 +163,19 @@ define i64 @fold_and_xor_neg_v1_64(i64 %x, i64 %y) nounwind {
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
-; X64-LABEL: fold_and_xor_neg_v1_64:
-; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: negq %rax
-; X64-NEXT: xorq %rdi, %rax
-; X64-NEXT: andq %rsi, %rax
-; X64-NEXT: retq
+; X64-NOBMI-LABEL: fold_and_xor_neg_v1_64:
+; X64-NOBMI: # %bb.0:
+; X64-NOBMI-NEXT: movq %rdi, %rax
+; X64-NOBMI-NEXT: negq %rax
+; X64-NOBMI-NEXT: xorq %rdi, %rax
+; X64-NOBMI-NEXT: andq %rsi, %rax
+; X64-NOBMI-NEXT: retq
+;
+; X64-BMI-LABEL: fold_and_xor_neg_v1_64:
+; X64-BMI: # %bb.0:
+; X64-BMI-NEXT: blsmskq %rdi, %rax
+; X64-BMI-NEXT: andnq %rsi, %rax, %rax
+; X64-BMI-NEXT: retq
%neg = sub i64 0, %x
%xor = xor i64 %x, %neg
%and = and i64 %xor, %y
@@ -271,8 +325,3 @@ define i32 @fold_and_xor_neg_v1_32_no_blsmsk_negative(i32 %x, i32 %y, i32 %z) no
%and = and i32 %xor, %y
ret i32 %and
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; X64-BMI: {{.*}}
-; X64-NOBMI: {{.*}}
-; X86-BMI: {{.*}}
-; X86-NOBMI: {{.*}}
More information about the llvm-commits
mailing list