[llvm] a3ada63 - [DAGCombiner] Combine shifts into multiply-high

Tue Jun 2 13:23:15 PDT 2020

Author: Amy Kwan
Date: 2020-06-02T15:22:48-05:00
New Revision: a3ada630d8abd00930db1c2822427be2301a489e

URL: https://github.com/llvm/llvm-project/commit/a3ada630d8abd00930db1c2822427be2301a489e
DIFF: https://github.com/llvm/llvm-project/commit/a3ada630d8abd00930db1c2822427be2301a489e.diff

LOG: [DAGCombiner] Combine shifts into multiply-high

This patch implements a target independent DAG combine to produce multiply-high
instructions from shifts. This DAG combine will combine shifts for any type as
long as the MULH on the narrow type is legal.

For now, it is enabled on PowerPC as PowerPC is the only target that has an
implementation of the isMulhCheaperThanMulShift TLI hook introduced in
D78271.

Moreover, this DAG combine focuses on catching the pattern:
(shift (mul (ext <narrow_type>:$a to <wide_type>), (ext <narrow_type>:$b to <wide_type>)), <narrow_width>)
to produce mulhs when we have a sign-extend, and mulhu when we have
a zero-extend.

The patch performs the following checks:
- Operation is a right shift arithmetic (sra) or logical (srl)
- Input to the shift is a multiply
- Both operands to the shift are sext/zext nodes
- The extends into the multiply are both the same
- The narrow type is half the width of the wide type
- The shift amount is the width of the narrow type
- The respective mulh operation is legal

Differential Revision: https://reviews.llvm.org/D78272

Added: 
    llvm/test/CodeGen/PowerPC/combine-to-mulh-shift-amount.ll
    llvm/test/CodeGen/PowerPC/mul-high.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5fb7aa0e98d4..707a8fd6c35b 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7911,6 +7911,77 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   return SDValue();
 }
 
+// Transform a right shift of a multiply into a multiply-high.
+// Examples:
+// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
+// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
+static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
+                                  const TargetLowering &TLI) {
+  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+         "SRL or SRA node is required here!");
+
+  // Check the shift amount. Proceed with the transformation if the shift
+  // amount is constant.
+  ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
+  if (!ShiftAmtSrc)
+    return SDValue();
+
+  SDLoc DL(N);
+
+  // The operation feeding into the shift must be a multiply.
+  SDValue ShiftOperand = N->getOperand(0);
+  if (ShiftOperand.getOpcode() != ISD::MUL)
+    return SDValue();
+
+  // Both operands must be equivalent extend nodes.
+  SDValue LeftOp = ShiftOperand.getOperand(0);
+  SDValue RightOp = ShiftOperand.getOperand(1);
+  bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
+  bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
+
+  if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
+    return SDValue();
+
+  EVT WideVT1 = LeftOp.getValueType();
+  EVT WideVT2 = RightOp.getValueType();
+  // Proceed with the transformation if the wide types match.
+  assert((WideVT1 == WideVT2) &&
+         "Cannot have a multiply node with two 
diff erent operand types.");
+
+  EVT NarrowVT = LeftOp.getOperand(0).getValueType();
+  // Check that the two extend nodes are the same type.
+  if (NarrowVT !=  RightOp.getOperand(0).getValueType())
+    return SDValue();
+
+  // Only transform into mulh if mulh for the narrow type is cheaper than
+  // a multiply followed by a shift. This should also check if mulh is
+  // legal for NarrowVT on the target.
+  if (!TLI.isMulhCheaperThanMulShift(NarrowVT))
+      return SDValue();
+
+  // Proceed with the transformation if the wide type is twice as large
+  // as the narrow type.
+  unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+  if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
+    return SDValue();
+
+  // Check the shift amount with the narrow type size.
+  // Proceed with the transformation if the shift amount is the width
+  // of the narrow type.
+  unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
+  if (ShiftAmt != NarrowVTSize)
+    return SDValue();
+
+  // If the operation feeding into the MUL is a sign extend (sext),
+  // we use mulhs. Othewise, zero extends (zext) use mulhu.
+  unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
+
+  SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
+                               RightOp.getOperand(0));
+  return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
+                                     : DAG.getZExtOrTrunc(Result, DL, WideVT1));
+}
+
 SDValue DAGCombiner::visitSRA(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -8098,6 +8169,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     if (SDValue NewSRA = visitShiftByConstant(N))
       return NewSRA;
 
+  // Try to transform this shift into a multiply-high if
+  // it matches the appropriate pattern detected in combineShiftToMULH.
+  if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
+    return MULH;
+
   return SDValue();
 }
 
@@ -8323,6 +8399,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     }
   }
 
+  // Try to transform this shift into a multiply-high if
+  // it matches the appropriate pattern detected in combineShiftToMULH.
+  if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
+    return MULH;
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/PowerPC/combine-to-mulh-shift-amount.ll b/llvm/test/CodeGen/PowerPC/combine-to-mulh-shift-amount.ll
new file mode 100644
index 000000000000..ba9089709b40
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/combine-to-mulh-shift-amount.ll
@@ -0,0 +1,116 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; These tests show that for 32-bit and 64-bit scalars, combining a shift to
+; a single multiply-high is only valid when the shift amount is the same as
+; the width of the narrow type.
+
+; That is, combining a shift to mulh is only valid for 32-bit when the shift
+; amount is 32.
+; Likewise, combining a shift to mulh is only valid for 64-bit when the shift
+; amount is 64.
+
+define i32 @test_mulhw(i32 %a, i32 %b) {
+; CHECK-LABEL: test_mulhw:
+; CHECK:     mulld
+; CHECK-NOT: mulhw
+; CHECK:     blr
+  %1 = sext i32 %a to i64
+  %2 = sext i32 %b to i64
+  %mul = mul i64 %1, %2
+  %shr = lshr i64 %mul, 33
+  %tr = trunc i64 %shr to i32
+  ret i32 %tr
+}
+
+define i32 @test_mulhu(i32 %a, i32 %b) {
+; CHECK-LABEL: test_mulhu:
+; CHECK:     mulld
+; CHECK-NOT: mulhwu
+; CHECK:     blr
+  %1 = zext i32 %a to i64
+  %2 = zext i32 %b to i64
+  %mul = mul i64 %1, %2
+  %shr = lshr i64 %mul, 33
+  %tr = trunc i64 %shr to i32
+  ret i32 %tr
+}
+
+define i64 @test_mulhd(i64 %a, i64 %b) {
+; CHECK-LABEL: test_mulhd:
+; CHECK:    mulhd
+; CHECK:    mulld
+; CHECK:    blr
+  %1 = sext i64 %a to i128
+  %2 = sext i64 %b to i128
+  %mul = mul i128 %1, %2
+  %shr = lshr i128 %mul, 63
+  %tr = trunc i128 %shr to i64
+  ret i64 %tr
+}
+
+define i64 @test_mulhdu(i64 %a, i64 %b) {
+; CHECK-LABEL: test_mulhdu:
+; CHECK:    mulhdu
+; CHECK:    mulld
+; CHECK:    blr
+  %1 = zext i64 %a to i128
+  %2 = zext i64 %b to i128
+  %mul = mul i128 %1, %2
+  %shr = lshr i128 %mul, 63
+  %tr = trunc i128 %shr to i64
+  ret i64 %tr
+}
+
+define signext i32 @test_mulhw_signext(i32 %a, i32 %b) {
+; CHECK-LABEL: test_mulhw_signext:
+; CHECK:     mulld
+; CHECK-NOT: mulhw
+; CHECK:     blr
+  %1 = sext i32 %a to i64
+  %2 = sext i32 %b to i64
+  %mul = mul i64 %1, %2
+  %shr = lshr i64 %mul, 33
+  %tr = trunc i64 %shr to i32
+  ret i32 %tr
+}
+
+define zeroext i32 @test_mulhu_zeroext(i32 %a, i32 %b) {
+; CHECK-LABEL: test_mulhu_zeroext:
+; CHECK:     mulld
+; CHECK-NOT: mulhwu
+; CHECK:     blr
+  %1 = zext i32 %a to i64
+  %2 = zext i32 %b to i64
+  %mul = mul i64 %1, %2
+  %shr = lshr i64 %mul, 33
+  %tr = trunc i64 %shr to i32
+  ret i32 %tr
+}
+
+define signext i64 @test_mulhd_signext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_mulhd_signext:
+; CHECK:    mulhd
+; CHECK:    mulld
+; CHECK:    blr
+  %1 = sext i64 %a to i128
+  %2 = sext i64 %b to i128
+  %mul = mul i128 %1, %2
+  %shr = lshr i128 %mul, 63
+  %tr = trunc i128 %shr to i64
+  ret i64 %tr
+}
+
+define zeroext i64 @test_mulhdu_zeroext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_mulhdu_zeroext:
+; CHECK:    mulhdu
+; CHECK:    mulld
+; CHECK:    blr
+  %1 = zext i64 %a to i128
+  %2 = zext i64 %b to i128
+  %mul = mul i128 %1, %2
+  %shr = lshr i128 %mul, 63
+  %tr = trunc i128 %shr to i64
+  ret i64 %tr
+}

diff  --git a/llvm/test/CodeGen/PowerPC/mul-high.ll b/llvm/test/CodeGen/PowerPC/mul-high.ll
new file mode 100644
index 000000000000..b6808f9ac605
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mul-high.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; This test case tests multiply high for i32 and i64. When the values are
+; sign-extended, mulh[d|w] is emitted. When values are zero-extended,
+; mulh[d|w]u is emitted instead.
+
+; The primary goal is transforming the pattern:
+; (shift (mul (ext $a, <wide_type>), (ext $b, <wide_type>)), <narrow_type>)
+; into (mulhs $a, $b) for sign extend, and (mulhu $a, $b) for zero extend,
+; provided that the mulh operation is legal for <narrow_type>.
+; The shift operation can be either the srl or sra operations.
+
+; When no attribute is present on i32, the shift operation is srl.
+define i32 @test_mulhw(i32 %a, i32 %b) {
+; CHECK-LABEL: test_mulhw:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mulhw r3, r3, r4
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+  %1 = sext i32 %a to i64
+  %2 = sext i32 %b to i64
+  %mul = mul i64 %1, %2
+  %shr = lshr i64 %mul, 32
+  %tr = trunc i64 %shr to i32
+  ret i32 %tr
+}
+
+define i32 @test_mulhu(i32 %a, i32 %b) {
+; CHECK-LABEL: test_mulhu:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mulhwu r3, r3, r4
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+  %1 = zext i32 %a to i64
+  %2 = zext i32 %b to i64
+  %mul = mul i64 %1, %2
+  %shr = lshr i64 %mul, 32
+  %tr = trunc i64 %shr to i32
+  ret i32 %tr
+}
+
+define i64 @test_mulhd(i64 %a, i64 %b) {
+; CHECK-LABEL: test_mulhd:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mulhd r3, r3, r4
+; CHECK-NEXT:    blr
+  %1 = sext i64 %a to i128
+  %2 = sext i64 %b to i128
+  %mul = mul i128 %1, %2
+  %shr = lshr i128 %mul, 64
+  %tr = trunc i128 %shr to i64
+  ret i64 %tr
+}
+
+define i64 @test_mulhdu(i64 %a, i64 %b) {
+; CHECK-LABEL: test_mulhdu:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mulhdu r3, r3, r4
+; CHECK-NEXT:    blr
+  %1 = zext i64 %a to i128
+  %2 = zext i64 %b to i128
+  %mul = mul i128 %1, %2
+  %shr = lshr i128 %mul, 64
+  %tr = trunc i128 %shr to i64
+  ret i64 %tr
+}
+
+; When the signext attribute is present on i32, the shift operation is sra.
+; We are actually transforming (sra (mul sext_in_reg, sext_in_reg)) into mulh.
+define signext i32 @test_mulhw_signext(i32 %a, i32 %b) {
+; CHECK-LABEL: test_mulhw_signext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mulhw r3, r3, r4
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    blr
+  %1 = sext i32 %a to i64
+  %2 = sext i32 %b to i64
+  %mul = mul i64 %1, %2
+  %shr = lshr i64 %mul, 32
+  %tr = trunc i64 %shr to i32
+  ret i32 %tr
+}
+
+define zeroext i32 @test_mulhu_zeroext(i32 %a, i32 %b) {
+; CHECK-LABEL: test_mulhu_zeroext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mulhwu r3, r3, r4
+; CHECK-NEXT:    clrldi r3, r3, 32
+; CHECK-NEXT:    blr
+  %1 = zext i32 %a to i64
+  %2 = zext i32 %b to i64
+  %mul = mul i64 %1, %2
+  %shr = lshr i64 %mul, 32
+  %tr = trunc i64 %shr to i32
+  ret i32 %tr
+}
+
+define signext i64 @test_mulhd_signext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_mulhd_signext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mulhd r3, r3, r4
+; CHECK-NEXT:    blr
+  %1 = sext i64 %a to i128
+  %2 = sext i64 %b to i128
+  %mul = mul i128 %1, %2
+  %shr = lshr i128 %mul, 64
+  %tr = trunc i128 %shr to i64
+  ret i64 %tr
+}
+
+define zeroext i64 @test_mulhdu_zeroext(i64 %a, i64 %b) {
+; CHECK-LABEL: test_mulhdu_zeroext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mulhdu r3, r3, r4
+; CHECK-NEXT:    blr
+  %1 = zext i64 %a to i128
+  %2 = zext i64 %b to i128
+  %mul = mul i128 %1, %2
+  %shr = lshr i128 %mul, 64
+  %tr = trunc i128 %shr to i64
+  ret i64 %tr
+}