[llvm] fc6bee1 - [SDAG] SimplifyDemandedBits - generalize fold for 2 LSB of X*X
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 7 12:39:02 PST 2022
Author: Sanjay Patel
Date: 2022-02-07T15:38:50-05:00
New Revision: fc6bee1c11d4aa2fc591a9272edbe01dd18a650f
URL: https://github.com/llvm/llvm-project/commit/fc6bee1c11d4aa2fc591a9272edbe01dd18a650f
DIFF: https://github.com/llvm/llvm-project/commit/fc6bee1c11d4aa2fc591a9272edbe01dd18a650f.diff
LOG: [SDAG] SimplifyDemandedBits - generalize fold for 2 LSB of X*X
This is translated from recent changes to the IR version of this function:
D119060
D119139
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/combine-mul.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7c1a66aceb4b5..77f05c51fdaca 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2265,9 +2265,14 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::MUL:
- // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
- if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1))
- return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+ // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
+ // X * X is odd iff X is odd.
+ // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
+ if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
+ SDValue One = TLO.DAG.getConstant(1, dl, VT);
+ SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
+ return TLO.CombineTo(Op, And1);
+ }
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB: {
diff --git a/llvm/test/CodeGen/AArch64/combine-mul.ll b/llvm/test/CodeGen/AArch64/combine-mul.ll
index 7875d423acd0a..a0ed88c896785 100644
--- a/llvm/test/CodeGen/AArch64/combine-mul.ll
+++ b/llvm/test/CodeGen/AArch64/combine-mul.ll
@@ -108,8 +108,7 @@ define i32 @one_demanded_low_bit(i32 %x) {
define i16 @squared_one_demanded_low_bit(i16 %x) {
; CHECK-LABEL: squared_one_demanded_low_bit:
; CHECK: // %bb.0:
-; CHECK-NEXT: mul w8, w0, w0
-; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: and w0, w0, #0x1
; CHECK-NEXT: ret
%mul = mul i16 %x, %x
%and = and i16 %mul, 1
@@ -120,7 +119,6 @@ define <4 x i32> @squared_one_demanded_low_bit_splat(<4 x i32> %x) {
; CHECK-LABEL: squared_one_demanded_low_bit_splat:
; CHECK: // %bb.0:
; CHECK-NEXT: mvni v1.4s, #1
-; CHECK-NEXT: mul v0.4s, v0.4s, v0.4s
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%mul = mul <4 x i32> %x, %x
@@ -131,8 +129,7 @@ define <4 x i32> @squared_one_demanded_low_bit_splat(<4 x i32> %x) {
define i32 @squared_demanded_2_low_bits(i32 %x) {
; CHECK-LABEL: squared_demanded_2_low_bits:
; CHECK: // %bb.0:
-; CHECK-NEXT: mul w8, w0, w0
-; CHECK-NEXT: and w0, w8, #0x3
+; CHECK-NEXT: and w0, w0, #0x1
; CHECK-NEXT: ret
%mul = mul i32 %x, %x
%and = and i32 %mul, 3
@@ -142,13 +139,7 @@ define i32 @squared_demanded_2_low_bits(i32 %x) {
define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) {
; CHECK-LABEL: squared_demanded_2_low_bits_splat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: mov x9, v0.d[1]
-; CHECK-NEXT: mul x8, x8, x8
-; CHECK-NEXT: mul x9, x9, x9
-; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: mov x8, #-2
-; CHECK-NEXT: mov v0.d[1], x9
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
More information about the llvm-commits
mailing list