[llvm] 2d1390e - [DAG] SimplifyDemandedBits - mul(x,x) - if only demand bit[1] then fold to zero
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 31 04:01:08 PST 2022
Author: Simon Pilgrim
Date: 2022-01-31T12:00:51Z
New Revision: 2d1390efbe610ff15a8cfc6d40f6e8eaa74355b6
URL: https://github.com/llvm/llvm-project/commit/2d1390efbe610ff15a8cfc6d40f6e8eaa74355b6
DIFF: https://github.com/llvm/llvm-project/commit/2d1390efbe610ff15a8cfc6d40f6e8eaa74355b6.diff
LOG: [DAG] SimplifyDemandedBits - mul(x,x) - if only demand bit[1] then fold to zero
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/X86/combine-mul.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a98c21f16c712..ba6cae00bc50e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2247,8 +2247,12 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
- case ISD::ADD:
case ISD::MUL:
+ // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
+ if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1))
+ return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+ LLVM_FALLTHROUGH;
+ case ISD::ADD:
case ISD::SUB: {
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll
index f0254e784cfc6..403443da60ee3 100644
--- a/llvm/test/CodeGen/X86/combine-mul.ll
+++ b/llvm/test/CodeGen/X86/combine-mul.ll
@@ -366,16 +366,12 @@ define <2 x i64> @combine_mul_to_abs_v2i64(<2 x i64> %x) {
define i64 @combine_mul_self_knownbits(i64 %x) {
; SSE-LABEL: combine_mul_self_knownbits:
; SSE: # %bb.0:
-; SSE-NEXT: movq %rdi, %rax
-; SSE-NEXT: imull %eax, %eax
-; SSE-NEXT: andl $2, %eax
+; SSE-NEXT: xorl %eax, %eax
; SSE-NEXT: retq
;
; AVX-LABEL: combine_mul_self_knownbits:
; AVX: # %bb.0:
-; AVX-NEXT: movq %rdi, %rax
-; AVX-NEXT: imull %eax, %eax
-; AVX-NEXT: andl $2, %eax
+; AVX-NEXT: xorl %eax, %eax
; AVX-NEXT: retq
%1 = mul i64 %x, %x
%2 = and i64 %1, 2
@@ -385,15 +381,12 @@ define i64 @combine_mul_self_knownbits(i64 %x) {
define <4 x i32> @combine_mul_self_knownbits_vector(<4 x i32> %x) {
; SSE-LABEL: combine_mul_self_knownbits_vector:
; SSE: # %bb.0:
-; SSE-NEXT: pmulld %xmm0, %xmm0
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_mul_self_knownbits_vector:
; AVX: # %bb.0:
-; AVX-NEXT: vpmulld %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
-; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = mul <4 x i32> %x, %x
%2 = and <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>
More information about the llvm-commits
mailing list