[llvm] fc6bee1 - [SDAG] SimplifyDemandedBits - generalize fold for 2 LSB of X*X

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 7 12:39:02 PST 2022


Author: Sanjay Patel
Date: 2022-02-07T15:38:50-05:00
New Revision: fc6bee1c11d4aa2fc591a9272edbe01dd18a650f

URL: https://github.com/llvm/llvm-project/commit/fc6bee1c11d4aa2fc591a9272edbe01dd18a650f
DIFF: https://github.com/llvm/llvm-project/commit/fc6bee1c11d4aa2fc591a9272edbe01dd18a650f.diff

LOG: [SDAG] SimplifyDemandedBits - generalize fold for 2 LSB of X*X

This is translated from recent changes to the IR version of this function:
D119060
D119139

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/AArch64/combine-mul.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7c1a66aceb4b5..77f05c51fdaca 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2265,9 +2265,14 @@ bool TargetLowering::SimplifyDemandedBits(
     break;
   }
   case ISD::MUL:
-    // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
-    if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1))
-      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
+    // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
+    // X * X is odd iff X is odd.
+    // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
+    if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
+      SDValue One = TLO.DAG.getConstant(1, dl, VT);
+      SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
+      return TLO.CombineTo(Op, And1);
+    }
     LLVM_FALLTHROUGH;
   case ISD::ADD:
   case ISD::SUB: {

diff  --git a/llvm/test/CodeGen/AArch64/combine-mul.ll b/llvm/test/CodeGen/AArch64/combine-mul.ll
index 7875d423acd0a..a0ed88c896785 100644
--- a/llvm/test/CodeGen/AArch64/combine-mul.ll
+++ b/llvm/test/CodeGen/AArch64/combine-mul.ll
@@ -108,8 +108,7 @@ define i32 @one_demanded_low_bit(i32 %x) {
 define i16 @squared_one_demanded_low_bit(i16 %x) {
 ; CHECK-LABEL: squared_one_demanded_low_bit:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w0, w0
-; CHECK-NEXT:    and w0, w8, #0x1
+; CHECK-NEXT:    and w0, w0, #0x1
 ; CHECK-NEXT:    ret
   %mul = mul i16 %x, %x
   %and = and i16 %mul, 1
@@ -120,7 +119,6 @@ define <4 x i32> @squared_one_demanded_low_bit_splat(<4 x i32> %x) {
 ; CHECK-LABEL: squared_one_demanded_low_bit_splat:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mvni v1.4s, #1
-; CHECK-NEXT:    mul v0.4s, v0.4s, v0.4s
 ; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %mul = mul <4 x i32> %x, %x
@@ -131,8 +129,7 @@ define <4 x i32> @squared_one_demanded_low_bit_splat(<4 x i32> %x) {
 define i32 @squared_demanded_2_low_bits(i32 %x) {
 ; CHECK-LABEL: squared_demanded_2_low_bits:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mul w8, w0, w0
-; CHECK-NEXT:    and w0, w8, #0x3
+; CHECK-NEXT:    and w0, w0, #0x1
 ; CHECK-NEXT:    ret
   %mul = mul i32 %x, %x
   %and = and i32 %mul, 3
@@ -142,13 +139,7 @@ define i32 @squared_demanded_2_low_bits(i32 %x) {
 define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) {
 ; CHECK-LABEL: squared_demanded_2_low_bits_splat:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    mov x9, v0.d[1]
-; CHECK-NEXT:    mul x8, x8, x8
-; CHECK-NEXT:    mul x9, x9, x9
-; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    mov x8, #-2
-; CHECK-NEXT:    mov v0.d[1], x9
 ; CHECK-NEXT:    dup v1.2d, x8
 ; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret


        


More information about the llvm-commits mailing list