[llvm] r265793 - [ARM] Enable SMLAW[B|T] and SMLUW[B|T] instruction selection

Fri Apr 8 09:02:53 PDT 2016

Author: sam_parker
Date: Fri Apr  8 11:02:53 2016
New Revision: 265793

URL: http://llvm.org/viewvc/llvm-project?rev=265793&view=rev
Log:
[ARM] Enable SMLAW[B|T] and SMLUW[B|T] instruction selection

Added ISelDAGToDAG functions to enable selection of the smlawb, smlawt,
smulwb and smulwt instructions for the ARM backend. Also updated the smul
CodeGen test and removed the smulw one.

Differential Revision: http://reviews.llvm.org/D18892


Removed:
    llvm/trunk/test/CodeGen/ARM/smulw.ll
Modified:
    llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
    llvm/trunk/test/CodeGen/ARM/smul.ll

Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=265793&r1=265792&r2=265793&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Fri Apr  8 11:02:53 2016
@@ -252,6 +252,8 @@ private:
 
   SDNode *SelectConcatVector(SDNode *N);
 
+  SDNode *SelectSMLAWSMULW(SDNode *N);
+
   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
   /// inline asm expressions.
   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
@@ -2467,6 +2469,135 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDN
   return nullptr;
 }
 
+static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
+                                 bool Accumulate) {
+  // For SM*WB, we need to some form of sext.
+  // For SM*WT, we need to search for (sra X, 16)
+  // Src1 then gets set to X.
+  if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
+       SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
+       SignExt.getOpcode() == ISD::AssertSext) &&
+       SignExt.getValueType() == MVT::i32) {
+
+    *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
+    Src1 = SignExt.getOperand(0);
+    return true;
+  }
+
+  if (SignExt.getOpcode() != ISD::SRA)
+    return false;
+
+  ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
+  if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
+    return false;
+
+  SDValue Op0 = SignExt.getOperand(0);
+
+  // The sign extend operand for SM*WB could be generated by a shl and ashr.
+  if (Op0.getOpcode() == ISD::SHL) {
+    SDValue SHL = Op0;
+    ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
+    if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
+      return false;
+
+    *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
+    Src1 = Op0.getOperand(0);
+    return true;
+  }
+  *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
+  Src1 = SignExt.getOperand(0);
+  return true;
+}
+
+static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
+                                SDValue &Src1, bool Accumulate) {
+  // First we look for:
+  // (add (or (srl ?, 16), (shl ?, 16)))
+  if (OR.getOpcode() != ISD::OR)
+    return false;
+
+  SDValue SRL = OR.getOperand(0);
+  SDValue SHL = OR.getOperand(1);
+
+  if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
+    SRL = OR.getOperand(1);
+    SHL = OR.getOperand(0);
+    if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
+      return false;
+  }
+
+  ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
+  ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
+  if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
+      SHLSrc1->getZExtValue() != 16)
+    return false;
+
+  // The first operands to the shifts need to be the two results from the
+  // same smul_lohi node.
+  if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
+       SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
+    return false;
+
+  SDNode *SMULLOHI = SRL.getOperand(0).getNode();
+  if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
+      SHL.getOperand(0) != SDValue(SMULLOHI, 1))
+    return false;
+
+  // Now we have:
+  // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
+  // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
+  // For SMLAWB the 16-bit value will signed extended somehow.
+  // For SMLAWT only the SRA is required.
+
+  // Check both sides of SMUL_LOHI
+  if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
+    Src0 = SMULLOHI->getOperand(1);
+  } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
+                                  Accumulate)) {
+    Src0 = SMULLOHI->getOperand(0);
+  } else {
+    return false;
+  }
+  return true;
+}
+
+SDNode *ARMDAGToDAGISel::SelectSMLAWSMULW(SDNode *N) {
+  SDLoc dl(N);
+  SDValue Src0 = N->getOperand(0);
+  SDValue Src1 = N->getOperand(1);
+  SDValue A, B;
+  unsigned Opc = 0;
+
+  if (N->getOpcode() == ISD::ADD) {
+    if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
+      return nullptr;
+
+    SDValue Acc;
+    if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
+      Acc = Src1;
+    } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
+      Acc = Src0;
+    } else {
+      return nullptr;
+    }
+    if (Opc == 0)
+      return nullptr;
+
+    SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
+                      CurDAG->getRegister(0, MVT::i32) };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
+  } else if (N->getOpcode() == ISD::OR &&
+             SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
+    if (Opc == 0)
+      return nullptr;
+
+    SDValue Ops[] = { A, B, getAL(CurDAG, dl),
+                      CurDAG->getRegister(0, MVT::i32)};
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
+  }
+  return nullptr;
+}
+
 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
   // The only time a CONCAT_VECTORS operation can have legal types is when
   // two 64-bit vectors are concatenated to a 128-bit vector.
@@ -2486,6 +2617,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *
 
   switch (N->getOpcode()) {
   default: break;
+  case ISD::ADD:
+  case ISD::OR: {
+    SDNode *ResNode = SelectSMLAWSMULW(N);
+    if (ResNode)
+      return ResNode;
+    break;
+  }
   case ISD::WRITE_REGISTER: {
     SDNode *ResNode = SelectWriteRegister(N);
     if (ResNode)

Modified: llvm/trunk/test/CodeGen/ARM/smul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/smul.ll?rev=265793&r1=265792&r2=265793&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/smul.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/smul.ll Fri Apr  8 11:02:53 2016
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=arm-eabi -mcpu=generic %s -o /dev/null
 ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
 @y = weak global i16 0          ; <i16*> [#uses=0]
@@ -34,3 +35,107 @@ define i32 @f3(i32 %a, i16 %x, i32 %y) {
         ret i32 %tmp5
 }
 
+define i32 @f4(i32 %a, i32 %x, i32 %y) {
+; CHECK-LABEL: f4
+; CHECK: smlatt
+        %tmp1 = ashr i32 %x, 16
+        %tmp3 = ashr i32 %y, 16
+        %tmp4 = mul i32 %tmp3, %tmp1
+        %tmp5 = add i32 %tmp4, %a
+        ret i32 %tmp5
+}
+
+define i32 @f5(i32 %a, i16 %x, i16 %y) {
+; CHECK-LABEL: f5
+; CHECK: smlabb
+        %tmp1 = sext i16 %x to i32
+        %tmp3 = sext i16 %y to i32
+        %tmp4 = mul i32 %tmp3, %tmp1
+        %tmp5 = add i32 %tmp4, %a
+        ret i32 %tmp5
+}
+
+define i32 @f6(i32 %a, i16 %x, i32 %y) {
+; CHECK-LABEL: f6
+; CHECK: smlabt
+        %tmp1 = sext i16 %x to i32
+        %tmp3 = ashr i32 %y, 16
+        %tmp4 = mul i32 %tmp3, %tmp1
+        %tmp5 = add i32 %tmp4, %a
+        ret i32 %tmp5
+}
+
+define i32 @f7(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f7
+; CHECK: smlawb
+        %shl = shl i32 %b, 16
+        %shr = ashr exact i32 %shl, 16
+        %conv = sext i32 %a to i64
+        %conv2 = sext i32 %shr to i64
+        %mul = mul nsw i64 %conv2, %conv
+        %shr49 = lshr i64 %mul, 16
+        %conv5 = trunc i64 %shr49 to i32
+        %add = add nsw i32 %conv5, %c
+        ret i32 %add
+}
+
+define i32 @f8(i32 %a, i16 signext %b, i32 %c) {
+; CHECK-LABEL: f8
+; CHECK: smlawb
+        %conv = sext i32 %a to i64
+        %conv1 = sext i16 %b to i64
+        %mul = mul nsw i64 %conv1, %conv
+        %shr5 = lshr i64 %mul, 16
+        %conv2 = trunc i64 %shr5 to i32
+        %add = add nsw i32 %conv2, %c
+        ret i32 %add
+}
+
+define i32 @f9(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f9
+; CHECK: smlawt
+        %conv = sext i32 %a to i64
+        %shr = ashr i32 %b, 16
+        %conv1 = sext i32 %shr to i64
+        %mul = mul nsw i64 %conv1, %conv
+        %shr26 = lshr i64 %mul, 16
+        %conv3 = trunc i64 %shr26 to i32
+        %add = add nsw i32 %conv3, %c
+        ret i32 %add
+}
+
+define i32 @f10(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f10
+; CHECK: smulwb
+        %shl = shl i32 %b, 16
+        %shr = ashr exact i32 %shl, 16
+        %conv = sext i32 %a to i64
+        %conv2 = sext i32 %shr to i64
+        %mul = mul nsw i64 %conv2, %conv
+        %shr37 = lshr i64 %mul, 16
+        %conv4 = trunc i64 %shr37 to i32
+        ret i32 %conv4
+}
+
+define i32 @f11(i32 %a, i16 signext %b, i32 %c) {
+; CHECK-LABEL: f11
+; CHECK: smulwb
+        %conv = sext i32 %a to i64
+        %conv1 = sext i16 %b to i64
+        %mul = mul nsw i64 %conv1, %conv
+        %shr4 = lshr i64 %mul, 16
+        %conv2 = trunc i64 %shr4 to i32
+        ret i32 %conv2
+}
+
+define i32 @f12(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: f12
+; CHECK: smulwt
+        %conv = sext i32 %a to i64
+        %shr = ashr i32 %b, 16
+        %conv1 = sext i32 %shr to i64
+        %mul = mul nsw i64 %conv1, %conv
+        %shr25 = lshr i64 %mul, 16
+        %conv3 = trunc i64 %shr25 to i32
+        ret i32 %conv3
+}

Removed: llvm/trunk/test/CodeGen/ARM/smulw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/smulw.ll?rev=265792&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/smulw.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/smulw.ll (removed)
@@ -1,26 +0,0 @@
-; RUN: llc -mtriple=arm--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumb--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
-
-; We cannot codegen the smulw[bt] or smlaw[bt] instructions for these functions,
-; as the top 16 bits of the result would differ
-
-define i32 @f1(i32 %a, i16 %b) {
-; CHECK-LABEL: f1:
-; CHECK: mul
-; CHECK: asr
-  %tmp1 = sext i16 %b to i32
-  %tmp2 = mul i32 %a, %tmp1
-  %tmp3 = ashr i32 %tmp2, 16
-  ret i32 %tmp3
-}
-
-define i32 @f2(i32 %a, i16 %b, i32 %c) {
-; CHECK-LABEL: f2:
-; CHECK: mul
-; CHECK: add{{.*}}, asr #16
-  %tmp1 = sext i16 %b to i32
-  %tmp2 = mul i32 %a, %tmp1
-  %tmp3 = ashr i32 %tmp2, 16
-  %tmp4 = add i32 %tmp3, %c
-  ret i32 %tmp4
-}