[llvm] d1ecfaa - [SDAG] try to fold one-demanded-bit-of-multiply
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 7 14:28:35 PST 2022
Author: Sanjay Patel
Date: 2022-02-07T17:24:35-05:00
New Revision: d1ecfaa097b1b5602c778acccbd687173ac434e8
URL: https://github.com/llvm/llvm-project/commit/d1ecfaa097b1b5602c778acccbd687173ac434e8
DIFF: https://github.com/llvm/llvm-project/commit/d1ecfaa097b1b5602c778acccbd687173ac434e8.diff
LOG: [SDAG] try to fold one-demanded-bit-of-multiply
This is a translation of the transform added to InstCombine with:
D118539
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/test/CodeGen/AArch64/combine-mul.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 77f05c51fdaca..72f14b4568829 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -2265,6 +2265,19 @@ bool TargetLowering::SimplifyDemandedBits(
break;
}
case ISD::MUL:
+ if (DemandedBits.isPowerOf2()) {
+ // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
+ // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
+ // odd (has LSB set), then the left-shifted low bit of X is the answer.
+ unsigned CTZ = DemandedBits.countTrailingZeros();
+ ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
+ if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
+ EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
+ SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
+ SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
+ return TLO.CombineTo(Op, Shl);
+ }
+ }
// For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
// X * X is odd iff X is odd.
// 'Quadratic Reciprocity': X * X -> 0 for bit[1]
diff --git a/llvm/test/CodeGen/AArch64/combine-mul.ll b/llvm/test/CodeGen/AArch64/combine-mul.ll
index a0ed88c896785..a2b0425308093 100644
--- a/llvm/test/CodeGen/AArch64/combine-mul.ll
+++ b/llvm/test/CodeGen/AArch64/combine-mul.ll
@@ -66,7 +66,7 @@ define <4 x i32> @combine_mul_self_demandedbits_vector(<4 x i32> %x) {
define i8 @one_demanded_bit(i8 %x) {
; CHECK-LABEL: one_demanded_bit:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w0, lsl #6
+; CHECK-NEXT: lsl w8, w0, #6
; CHECK-NEXT: orr w0, w8, #0xffffffbf
; CHECK-NEXT: ret
%m = mul i8 %x, 192 ; 0b1100_0000
@@ -77,16 +77,9 @@ define i8 @one_demanded_bit(i8 %x) {
define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
; CHECK-LABEL: one_demanded_bit_splat:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov x8, d0
-; CHECK-NEXT: mov x9, v0.d[1]
-; CHECK-NEXT: add x8, x8, x8, lsl #2
-; CHECK-NEXT: lsl x8, x8, #5
-; CHECK-NEXT: add x9, x9, x9, lsl #2
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: lsl x8, x9, #5
-; CHECK-NEXT: mov w9, #32
-; CHECK-NEXT: mov v0.d[1], x8
-; CHECK-NEXT: dup v1.2d, x9
+; CHECK-NEXT: mov w8, #32
+; CHECK-NEXT: shl v0.2d, v0.2d, #5
+; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%m = mul <2 x i64> %x, <i64 160, i64 160> ; 0b1010_0000
@@ -97,8 +90,7 @@ define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
define i32 @one_demanded_low_bit(i32 %x) {
; CHECK-LABEL: one_demanded_low_bit:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w0
-; CHECK-NEXT: and w0, w8, #0x1
+; CHECK-NEXT: and w0, w0, #0x1
; CHECK-NEXT: ret
%m = mul i32 %x, -63 ; any odd number will do
%r = and i32 %m, 1
More information about the llvm-commits
mailing list