[llvm-branch-commits] [llvm] 1307e3f - [TargetLowering] Add icmp ne/eq (srl (ctlz x), log2(bw)) vector support.
Simon Pilgrim via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Jan 6 08:17:56 PST 2021
Author: Simon Pilgrim
Date: 2021-01-06T16:13:51Z
New Revision: 1307e3f6c46cc3a6e6ad9cd46fc67efafcac939e
URL: https://github.com/llvm/llvm-project/commit/1307e3f6c46cc3a6e6ad9cd46fc67efafcac939e
DIFF: https://github.com/llvm/llvm-project/commit/1307e3f6c46cc3a6e6ad9cd46fc67efafcac939e.diff
LOG: [TargetLowering] Add icmp ne/eq (srl (ctlz x), log2(bw)) vector support.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/X86/lzcnt-cmp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d895a53e5a83..f5abb2c513fb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3486,35 +3486,36 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Optimize some CTPOP cases.
if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
return V;
- }
-
- // FIXME: Support vectors.
- if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
- const APInt &C1 = N1C->getAPIntValue();
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
- N0.getOperand(1).getOpcode() == ISD::Constant) {
- const APInt &ShAmt = N0.getConstantOperandAPInt(1);
- if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
- ShAmt == Log2_32(N0.getValueSizeInBits())) {
- if ((C1 == 0) == (Cond == ISD::SETEQ)) {
- // (srl (ctlz x), 5) == 0 -> X != 0
- // (srl (ctlz x), 5) != 1 -> X != 0
- Cond = ISD::SETNE;
- } else {
- // (srl (ctlz x), 5) != 0 -> X == 0
- // (srl (ctlz x), 5) == 1 -> X == 0
- Cond = ISD::SETEQ;
+ isPowerOf2_32(N0.getScalarValueSizeInBits())) {
+ if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
+ if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+ // (srl (ctlz x), 5) == 0 -> X != 0
+ // (srl (ctlz x), 5) != 1 -> X != 0
+ Cond = ISD::SETNE;
+ } else {
+ // (srl (ctlz x), 5) != 0 -> X == 0
+ // (srl (ctlz x), 5) == 1 -> X == 0
+ Cond = ISD::SETEQ;
+ }
+ SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
+ return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
+ Cond);
}
- SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
- return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
- Zero, Cond);
}
}
+ }
+
+ // FIXME: Support vectors.
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const APInt &C1 = N1C->getAPIntValue();
// (zext x) == C --> x == (trunc C)
// (sext x) == C --> x == (trunc C)
diff --git a/llvm/test/CodeGen/X86/lzcnt-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
index 435b09dd5d08..3823524f552a 100644
--- a/llvm/test/CodeGen/X86/lzcnt-cmp.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-cmp.ll
@@ -96,75 +96,36 @@ define i1 @lshr_ctlz_undef_cmpne_zero_i64(i64 %in) {
define <2 x i64> @lshr_ctlz_cmpeq_zero_v2i64(<2 x i64> %in) {
; X86-LABEL: lshr_ctlz_cmpeq_zero_v2i64:
; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: setne %cl
+; X86-NEXT: negl %ecx
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: jne .LBB4_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: lzcntl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: addl $32, %ecx
-; X86-NEXT: .LBB4_2:
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: jne .LBB4_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: lzcntl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: addl $32, %edx
-; X86-NEXT: .LBB4_4:
-; X86-NEXT: andl $-64, %edx
-; X86-NEXT: cmpl $1, %edx
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: andl $-64, %ecx
-; X86-NEXT: cmpl $1, %ecx
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %edx, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: setne %dl
+; X86-NEXT: negl %edx
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl $4
;
; X64-LABEL: lshr_ctlz_cmpeq_zero_v2i64:
; X64: # %bb.0:
-; X64-NEXT: movdqa %xmm0, %xmm1
-; X64-NEXT: psrlq $1, %xmm1
-; X64-NEXT: por %xmm0, %xmm1
-; X64-NEXT: movdqa %xmm1, %xmm0
-; X64-NEXT: psrlq $2, %xmm0
-; X64-NEXT: por %xmm1, %xmm0
-; X64-NEXT: movdqa %xmm0, %xmm1
-; X64-NEXT: psrlq $4, %xmm1
-; X64-NEXT: por %xmm0, %xmm1
-; X64-NEXT: movdqa %xmm1, %xmm0
-; X64-NEXT: psrlq $8, %xmm0
-; X64-NEXT: por %xmm1, %xmm0
-; X64-NEXT: movdqa %xmm0, %xmm1
-; X64-NEXT: psrlq $16, %xmm1
-; X64-NEXT: por %xmm0, %xmm1
-; X64-NEXT: movdqa %xmm1, %xmm0
-; X64-NEXT: psrlq $32, %xmm0
-; X64-NEXT: por %xmm1, %xmm0
-; X64-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-NEXT: pxor %xmm0, %xmm1
-; X64-NEXT: movdqa %xmm1, %xmm0
-; X64-NEXT: psrlw $1, %xmm0
-; X64-NEXT: pand {{.*}}(%rip), %xmm0
-; X64-NEXT: psubb %xmm0, %xmm1
-; X64-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X64-NEXT: movdqa %xmm1, %xmm2
-; X64-NEXT: pand %xmm0, %xmm2
-; X64-NEXT: psrlw $2, %xmm1
-; X64-NEXT: pand %xmm0, %xmm1
-; X64-NEXT: paddb %xmm2, %xmm1
-; X64-NEXT: movdqa %xmm1, %xmm2
-; X64-NEXT: psrlw $4, %xmm2
-; X64-NEXT: paddb %xmm1, %xmm2
-; X64-NEXT: pand {{.*}}(%rip), %xmm2
-; X64-NEXT: pxor %xmm0, %xmm0
-; X64-NEXT: psadbw %xmm0, %xmm2
-; X64-NEXT: psrlq $6, %xmm2
-; X64-NEXT: pcmpeqd %xmm0, %xmm2
-; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,0,3,2]
-; X64-NEXT: pand %xmm2, %xmm0
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: pcmpeqd %xmm0, %xmm1
+; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; X64-NEXT: pand %xmm1, %xmm2
+; X64-NEXT: pcmpeqd %xmm0, %xmm0
+; X64-NEXT: pxor %xmm2, %xmm0
; X64-NEXT: retq
%ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0)
%lshr = lshr <2 x i64> %ctlz, <i64 6, i64 6>
@@ -176,76 +137,34 @@ define <2 x i64> @lshr_ctlz_cmpeq_zero_v2i64(<2 x i64> %in) {
define <2 x i64> @lshr_ctlz_cmpne_zero_v2i64(<2 x i64> %in) {
; X86-LABEL: lshr_ctlz_cmpne_zero_v2i64:
; X86: # %bb.0:
+; X86-NEXT: pushl %esi
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT: sete %cl
+; X86-NEXT: negl %ecx
; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: jne .LBB5_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: lzcntl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: addl $32, %ecx
-; X86-NEXT: .LBB5_2:
-; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp)
-; X86-NEXT: jne .LBB5_4
-; X86-NEXT: # %bb.3:
-; X86-NEXT: lzcntl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: addl $32, %edx
-; X86-NEXT: .LBB5_4:
-; X86-NEXT: andl $-64, %edx
+; X86-NEXT: orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: sete %dl
; X86-NEXT: negl %edx
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: andl $-64, %ecx
-; X86-NEXT: negl %ecx
-; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: movl %ecx, 8(%eax)
-; X86-NEXT: movl %edx, 4(%eax)
-; X86-NEXT: movl %edx, (%eax)
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edx, 8(%eax)
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl %ecx, (%eax)
+; X86-NEXT: popl %esi
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl $4
;
; X64-LABEL: lshr_ctlz_cmpne_zero_v2i64:
; X64: # %bb.0:
-; X64-NEXT: movdqa %xmm0, %xmm1
-; X64-NEXT: psrlq $1, %xmm1
-; X64-NEXT: por %xmm0, %xmm1
-; X64-NEXT: movdqa %xmm1, %xmm0
-; X64-NEXT: psrlq $2, %xmm0
-; X64-NEXT: por %xmm1, %xmm0
-; X64-NEXT: movdqa %xmm0, %xmm1
-; X64-NEXT: psrlq $4, %xmm1
-; X64-NEXT: por %xmm0, %xmm1
-; X64-NEXT: movdqa %xmm1, %xmm0
-; X64-NEXT: psrlq $8, %xmm0
-; X64-NEXT: por %xmm1, %xmm0
-; X64-NEXT: movdqa %xmm0, %xmm1
-; X64-NEXT: psrlq $16, %xmm1
-; X64-NEXT: por %xmm0, %xmm1
-; X64-NEXT: movdqa %xmm1, %xmm2
-; X64-NEXT: psrlq $32, %xmm2
-; X64-NEXT: por %xmm1, %xmm2
-; X64-NEXT: pcmpeqd %xmm1, %xmm1
-; X64-NEXT: pxor %xmm1, %xmm2
-; X64-NEXT: movdqa %xmm2, %xmm0
-; X64-NEXT: psrlw $1, %xmm0
-; X64-NEXT: pand {{.*}}(%rip), %xmm0
-; X64-NEXT: psubb %xmm0, %xmm2
-; X64-NEXT: movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X64-NEXT: movdqa %xmm2, %xmm3
-; X64-NEXT: pand %xmm0, %xmm3
-; X64-NEXT: psrlw $2, %xmm2
-; X64-NEXT: pand %xmm0, %xmm2
-; X64-NEXT: paddb %xmm3, %xmm2
-; X64-NEXT: movdqa %xmm2, %xmm0
-; X64-NEXT: psrlw $4, %xmm0
-; X64-NEXT: paddb %xmm2, %xmm0
-; X64-NEXT: pand {{.*}}(%rip), %xmm0
-; X64-NEXT: pxor %xmm2, %xmm2
-; X64-NEXT: psadbw %xmm2, %xmm0
-; X64-NEXT: psrlq $6, %xmm0
-; X64-NEXT: pcmpeqd %xmm2, %xmm0
-; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
-; X64-NEXT: pand %xmm2, %xmm0
-; X64-NEXT: pxor %xmm1, %xmm0
+; X64-NEXT: pxor %xmm1, %xmm1
+; X64-NEXT: pcmpeqd %xmm0, %xmm1
+; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
+; X64-NEXT: pand %xmm1, %xmm0
; X64-NEXT: retq
%ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0)
%lshr = lshr <2 x i64> %ctlz, <i64 6, i64 6>
More information about the llvm-branch-commits
mailing list