[llvm] 50c9f1e - [TargetLowering] Replace Log2_32_Ceil with Log2_32 in SimplifySetCC ctpop combine.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 15 01:05:31 PDT 2020
Author: Craig Topper
Date: 2020-10-15T01:05:07-07:00
New Revision: 50c9f1e11d18aa2a784a2e2f9676e7c3e356f5ad
URL: https://github.com/llvm/llvm-project/commit/50c9f1e11d18aa2a784a2e2f9676e7c3e356f5ad
DIFF: https://github.com/llvm/llvm-project/commit/50c9f1e11d18aa2a784a2e2f9676e7c3e356f5ad.diff
LOG: [TargetLowering] Replace Log2_32_Ceil with Log2_32 in SimplifySetCC ctpop combine.
This combine can look through (trunc (ctpop X)). When doing this
it tries to make sure the trunc doesn't lose any information
from the ctpop. It does this by checking that the truncated type
has more bits that Log2_32_Ceil of the ctpop type. The Ceil is
unnecessary and pessimizes non-power of 2 types.
For example, ctpop of i256 requires 9 bits to represent the max
value of 256. But ctpop of i255 only requires 8 bits to represent
the max result of 255. Log2_32_Ceil of 256 and 255 both return 8
while Log2_32 returns 8 for 256 and 7 for 255
The code with popcnt enabled is a regression for this test case,
but it does match what already happens with i256 truncated to i9.
Since power of 2 is more likely, I don't think it should block
this change.
Differential Revision: https://reviews.llvm.org/D89412
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/X86/ctpop-combine.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 287ee3b8d21f..56ead8d79e6f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3446,7 +3446,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
(N0 == CTPOP ||
- N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
+ N0.getValueSizeInBits() > Log2_32(CTPOP.getValueSizeInBits()))) {
EVT CTVT = CTPOP.getValueType();
SDValue CTOp = CTPOP.getOperand(0);
diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll
index c3bea4923530..2a686d393824 100644
--- a/llvm/test/CodeGen/X86/ctpop-combine.ll
+++ b/llvm/test/CodeGen/X86/ctpop-combine.ll
@@ -162,96 +162,39 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
}
define i1 @ctpop_trunc_non_power2(i255 %x) {
-; POPCOUNT-LABEL: ctpop_trunc_non_power2:
-; POPCOUNT: # %bb.0:
-; POPCOUNT-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
-; POPCOUNT-NEXT: andq %rcx, %rax
-; POPCOUNT-NEXT: popcntq %rsi, %rcx
-; POPCOUNT-NEXT: popcntq %rdi, %rsi
-; POPCOUNT-NEXT: addl %ecx, %esi
-; POPCOUNT-NEXT: popcntq %rax, %rax
-; POPCOUNT-NEXT: popcntq %rdx, %rcx
-; POPCOUNT-NEXT: addl %eax, %ecx
-; POPCOUNT-NEXT: addl %esi, %ecx
-; POPCOUNT-NEXT: cmpb $1, %cl
-; POPCOUNT-NEXT: sete %al
-; POPCOUNT-NEXT: retq
-;
-; NO-POPCOUNT-LABEL: ctpop_trunc_non_power2:
-; NO-POPCOUNT: # %bb.0:
-; NO-POPCOUNT-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF
-; NO-POPCOUNT-NEXT: andq %rcx, %rax
-; NO-POPCOUNT-NEXT: movq %rsi, %rcx
-; NO-POPCOUNT-NEXT: shrq %rcx
-; NO-POPCOUNT-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
-; NO-POPCOUNT-NEXT: andq %r8, %rcx
-; NO-POPCOUNT-NEXT: subq %rcx, %rsi
-; NO-POPCOUNT-NEXT: movabsq $3689348814741910323, %r11 # imm = 0x3333333333333333
-; NO-POPCOUNT-NEXT: movq %rsi, %rcx
-; NO-POPCOUNT-NEXT: andq %r11, %rcx
-; NO-POPCOUNT-NEXT: shrq $2, %rsi
-; NO-POPCOUNT-NEXT: andq %r11, %rsi
-; NO-POPCOUNT-NEXT: addq %rcx, %rsi
-; NO-POPCOUNT-NEXT: movq %rsi, %rcx
-; NO-POPCOUNT-NEXT: shrq $4, %rcx
-; NO-POPCOUNT-NEXT: addq %rsi, %rcx
-; NO-POPCOUNT-NEXT: movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F
-; NO-POPCOUNT-NEXT: andq %r9, %rcx
-; NO-POPCOUNT-NEXT: movabsq $72340172838076673, %r10 # imm = 0x101010101010101
-; NO-POPCOUNT-NEXT: imulq %r10, %rcx
-; NO-POPCOUNT-NEXT: shrq $56, %rcx
-; NO-POPCOUNT-NEXT: movq %rdi, %rsi
-; NO-POPCOUNT-NEXT: shrq %rsi
-; NO-POPCOUNT-NEXT: andq %r8, %rsi
-; NO-POPCOUNT-NEXT: subq %rsi, %rdi
-; NO-POPCOUNT-NEXT: movq %rdi, %rsi
-; NO-POPCOUNT-NEXT: andq %r11, %rsi
-; NO-POPCOUNT-NEXT: shrq $2, %rdi
-; NO-POPCOUNT-NEXT: andq %r11, %rdi
-; NO-POPCOUNT-NEXT: addq %rsi, %rdi
-; NO-POPCOUNT-NEXT: movq %rdi, %rsi
-; NO-POPCOUNT-NEXT: shrq $4, %rsi
-; NO-POPCOUNT-NEXT: addq %rdi, %rsi
-; NO-POPCOUNT-NEXT: andq %r9, %rsi
-; NO-POPCOUNT-NEXT: imulq %r10, %rsi
-; NO-POPCOUNT-NEXT: shrq $56, %rsi
-; NO-POPCOUNT-NEXT: addl %ecx, %esi
-; NO-POPCOUNT-NEXT: movq %rax, %rcx
-; NO-POPCOUNT-NEXT: shrq %rcx
-; NO-POPCOUNT-NEXT: movabsq $1537228672809129301, %rdi # imm = 0x1555555555555555
-; NO-POPCOUNT-NEXT: andq %rcx, %rdi
-; NO-POPCOUNT-NEXT: subq %rdi, %rax
-; NO-POPCOUNT-NEXT: movq %rax, %rcx
-; NO-POPCOUNT-NEXT: andq %r11, %rcx
-; NO-POPCOUNT-NEXT: shrq $2, %rax
-; NO-POPCOUNT-NEXT: andq %r11, %rax
-; NO-POPCOUNT-NEXT: addq %rcx, %rax
-; NO-POPCOUNT-NEXT: movq %rax, %rcx
-; NO-POPCOUNT-NEXT: shrq $4, %rcx
-; NO-POPCOUNT-NEXT: addq %rax, %rcx
-; NO-POPCOUNT-NEXT: andq %r9, %rcx
-; NO-POPCOUNT-NEXT: imulq %r10, %rcx
-; NO-POPCOUNT-NEXT: shrq $56, %rcx
-; NO-POPCOUNT-NEXT: movq %rdx, %rax
-; NO-POPCOUNT-NEXT: shrq %rax
-; NO-POPCOUNT-NEXT: andq %r8, %rax
-; NO-POPCOUNT-NEXT: subq %rax, %rdx
-; NO-POPCOUNT-NEXT: movq %rdx, %rax
-; NO-POPCOUNT-NEXT: andq %r11, %rax
-; NO-POPCOUNT-NEXT: shrq $2, %rdx
-; NO-POPCOUNT-NEXT: andq %r11, %rdx
-; NO-POPCOUNT-NEXT: addq %rax, %rdx
-; NO-POPCOUNT-NEXT: movq %rdx, %rax
-; NO-POPCOUNT-NEXT: shrq $4, %rax
-; NO-POPCOUNT-NEXT: addq %rdx, %rax
-; NO-POPCOUNT-NEXT: andq %r9, %rax
-; NO-POPCOUNT-NEXT: imulq %r10, %rax
-; NO-POPCOUNT-NEXT: shrq $56, %rax
-; NO-POPCOUNT-NEXT: addl %ecx, %eax
-; NO-POPCOUNT-NEXT: addl %esi, %eax
-; NO-POPCOUNT-NEXT: cmpb $1, %al
-; NO-POPCOUNT-NEXT: sete %al
-; NO-POPCOUNT-NEXT: retq
+; CHECK-LABEL: ctpop_trunc_non_power2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset %rbx, -16
+; CHECK-NEXT: movabsq $9223372036854775807, %r8 # imm = 0x7FFFFFFFFFFFFFFF
+; CHECK-NEXT: movq %rcx, %r9
+; CHECK-NEXT: andq %r8, %r9
+; CHECK-NEXT: movq %rdi, %r11
+; CHECK-NEXT: addq $-1, %r11
+; CHECK-NEXT: movq %rsi, %r10
+; CHECK-NEXT: adcq $-1, %r10
+; CHECK-NEXT: movq %rdx, %rax
+; CHECK-NEXT: adcq $-1, %rax
+; CHECK-NEXT: movq %rcx, %rbx
+; CHECK-NEXT: adcq %r8, %rbx
+; CHECK-NEXT: andq %rdi, %r11
+; CHECK-NEXT: andq %rdx, %rax
+; CHECK-NEXT: orq %r11, %rax
+; CHECK-NEXT: andq %rsi, %r10
+; CHECK-NEXT: andq %r8, %rbx
+; CHECK-NEXT: andq %rcx, %rbx
+; CHECK-NEXT: orq %r10, %rbx
+; CHECK-NEXT: orq %rax, %rbx
+; CHECK-NEXT: sete %cl
+; CHECK-NEXT: orq %rdx, %rdi
+; CHECK-NEXT: orq %rsi, %r9
+; CHECK-NEXT: orq %rdi, %r9
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: andb %cl, %al
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
%a = call i255 @llvm.ctpop.i255(i255 %x)
%b = trunc i255 %a to i8 ; largest value from ctpop is 255, fits in 8 bits.
%c = icmp eq i8 %b, 1
More information about the llvm-commits
mailing list