[llvm] 1ef7bf4 - [PowerPC] Improve the way legalize mul for v8i16 and add pattern to match mul + add

Wed Mar 25 21:47:00 PDT 2020

Author: QingShan Zhang
Date: 2020-03-26T04:46:49Z
New Revision: 1ef7bf412141811fa80473e0f13e9dc76972b1a0

URL: https://github.com/llvm/llvm-project/commit/1ef7bf412141811fa80473e0f13e9dc76972b1a0
DIFF: https://github.com/llvm/llvm-project/commit/1ef7bf412141811fa80473e0f13e9dc76972b1a0.diff

LOG: [PowerPC] Improve the way legalize mul for v8i16 and add pattern to match mul + add

We can legalize the operation MUL for v8i16 with instruction (vmladduhm A, B, 0)
if altivec enabled. Now, it is set as custom and expand it later, which is not
the right way. And then, we can add the pattern to match the mul + add with (vmladduhm A, B, C)

Reviewed By: Nemanjai

Differential Revision: https://reviews.llvm.org/D76751

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrAltivec.td
    llvm/test/CodeGen/PowerPC/vmladduhm.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ec102c35ec05..f2db901a1a84 100644

--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -764,7 +764,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     else
       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 
-    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+    setOperationAction(ISD::MUL, MVT::v8i16, Legal);
     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 
     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
@@ -10454,13 +10454,6 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
                               Neg16, DAG, dl);
     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
-  } else if (Op.getValueType() == MVT::v8i16) {
-    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
-
-    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
-
-    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
-                            LHS, RHS, Zero, DAG, dl);
   } else if (Op.getValueType() == MVT::v16i8) {
     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
     bool isLittleEndian = Subtarget.isLittleEndian();

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index ee4ea4487aab..1dd8142f46eb 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -869,6 +869,12 @@ def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
 def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
           (v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
 
+// Multiply
+def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>;
+
+// Add
+def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>;
+
 // Saturating adds/subtracts.
 def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
 def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;

diff  --git a/llvm/test/CodeGen/PowerPC/vmladduhm.ll b/llvm/test/CodeGen/PowerPC/vmladduhm.ll
index f2475d9e2490..3fae99829abc 100644
--- a/llvm/test/CodeGen/PowerPC/vmladduhm.ll
+++ b/llvm/test/CodeGen/PowerPC/vmladduhm.ll
@@ -4,7 +4,7 @@
 define <8 x i16> @mul(<8 x i16> %m, <8 x i16> %n) {
 ; CHECK-LABEL: mul:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor 36, 36, 36
+; CHECK-NEXT:    vxor 4, 4, 4
 ; CHECK-NEXT:    vmladduhm 2, 2, 3, 4
 ; CHECK-NEXT:    blr
 entry:
@@ -15,9 +15,7 @@ entry:
 define <8 x i16> @madd(<8 x i16> %m, <8 x i16> %n, <8 x i16> %o) {
 ; CHECK-LABEL: madd:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xxlxor 37, 37, 37
-; CHECK-NEXT:    vmladduhm 2, 2, 3, 5
-; CHECK-NEXT:    vadduhm 2, 2, 4
+; CHECK-NEXT:    vmladduhm 2, 2, 3, 4
 ; CHECK-NEXT:    blr
 entry:
   %0 = mul <8 x i16> %m, %n