[llvm] r367601 - [X86] In decomposeMulByConstant, legalize the VT before querying whether the multiply is legal

Thu Aug 1 11:49:08 PDT 2019

Author: ctopper
Date: Thu Aug  1 11:49:07 2019
New Revision: 367601

URL: http://llvm.org/viewvc/llvm-project?rev=367601&view=rev
Log:
[X86] In decomposeMulByConstant, legalize the VT before querying whether the multiply is legal

If a type is larger than a legal type and needs to be split, we would previously allow the multiply to be decomposed even if the split multiply is legal. Since the shift + add/sub code would also need to be split, its not any better to decompose it.

This patch figures out what type the mul will eventually be legalized to and then uses that type for the query. I tried just returning false illegal types and letting them get handled after type legalization, but then we can't recognize and i64 constant splat on 32-bit targets since will be destroyed by type legalization. We could special case vectors of i64 to avoid that...

Differential Revision: https://reviews.llvm.org/D65533

Modified:
    llvm/trunk/include/llvm/CodeGen/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/test/CodeGen/X86/vector-mul.ll

Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=367601&r1=367600&r2=367601&view=diff
==============================================================================

--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Thu Aug  1 11:49:07 2019
@@ -1904,7 +1904,8 @@ public:
   /// This may be true if the target does not directly support the
   /// multiplication operation for the specified type or the sequence of simpler
   /// ops is faster than the multiply.
-  virtual bool decomposeMulByConstant(EVT VT, SDValue C) const {
+  virtual bool decomposeMulByConstant(LLVMContext &Context,
+                                      EVT VT, SDValue C) const {
     return false;
   }
 

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=367601&r1=367600&r2=367601&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Aug  1 11:49:07 2019
@@ -3556,7 +3556,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N)
   //           x * 15 --> (x << 4) - x
   //           x * -33 --> -((x << 5) + x)
   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
-  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
+  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
     // TODO: We could handle more general decomposition of any constant by
     //       having the target set a limit on number of ops and making a
     //       callback to determine that sequence (similar to sqrt expansion).

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=367601&r1=367600&r2=367601&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Aug  1 11:49:07 2019
@@ -4869,15 +4869,25 @@ bool X86TargetLowering::convertSelectOfC
   return true;
 }
 
-bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
+bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
+                                               SDValue C) const {
   // TODO: We handle scalars using custom code, but generic combining could make
   // that unnecessary.
   APInt MulC;
   if (!ISD::isConstantSplatVector(C.getNode(), MulC))
     return false;
 
+  // Find the type this will be legalized too. Otherwise we might prematurely
+  // convert this to shl+add/sub and then still have to type legalize those ops.
+  // Another choice would be to defer the decision for illegal types until 
+  // after type legalization. But constant splat vectors of i64 can't make it
+  // through type legalization on 32-bit targets so we would need to special
+  // case vXi64.
+  while (getTypeAction(Context, VT) != TypeLegal)
+    VT = getTypeToTransformTo(Context, VT);
+
   // If vector multiply is legal, assume that's faster than shl + add/sub.
-  // TODO: Multiply is a complex op with higher latency and lower througput in
+  // TODO: Multiply is a complex op with higher latency and lower throughput in
   //       most implementations, so this check could be loosened based on type
   //       and/or a CPU attribute.
   if (isOperationLegal(ISD::MUL, VT))

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=367601&r1=367600&r2=367601&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Thu Aug  1 11:49:07 2019
@@ -1105,7 +1105,8 @@ namespace llvm {
 
     bool convertSelectOfConstantsToMath(EVT VT) const override;
 
-    bool decomposeMulByConstant(EVT VT, SDValue C) const override;
+    bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
+                                SDValue C) const override;
 
     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
                                   bool IsSigned) const override;

Modified: llvm/trunk/test/CodeGen/X86/vector-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-mul.ll?rev=367601&r1=367600&r2=367601&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-mul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-mul.ll Thu Aug  1 11:49:07 2019
@@ -435,26 +435,16 @@ define <4 x i64> @mul_v4i64_17(<4 x i64>
 define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {
 ; X86-LABEL: mul_v8i32_17:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa %xmm0, %xmm2
-; X86-NEXT:    pslld $4, %xmm2
-; X86-NEXT:    paddd %xmm0, %xmm2
-; X86-NEXT:    movdqa %xmm1, %xmm3
-; X86-NEXT:    pslld $4, %xmm3
-; X86-NEXT:    paddd %xmm1, %xmm3
-; X86-NEXT:    movdqa %xmm2, %xmm0
-; X86-NEXT:    movdqa %xmm3, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17]
+; X86-NEXT:    pmulld %xmm2, %xmm0
+; X86-NEXT:    pmulld %xmm2, %xmm1
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: mul_v8i32_17:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa %xmm0, %xmm2
-; X64-NEXT:    pslld $4, %xmm2
-; X64-NEXT:    paddd %xmm0, %xmm2
-; X64-NEXT:    movdqa %xmm1, %xmm3
-; X64-NEXT:    pslld $4, %xmm3
-; X64-NEXT:    paddd %xmm1, %xmm3
-; X64-NEXT:    movdqa %xmm2, %xmm0
-; X64-NEXT:    movdqa %xmm3, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17]
+; X64-NEXT:    pmulld %xmm2, %xmm0
+; X64-NEXT:    pmulld %xmm2, %xmm1
 ; X64-NEXT:    retq
 ;
 ; X64-XOP-LABEL: mul_v8i32_17:
@@ -484,26 +474,16 @@ define <8 x i32> @mul_v8i32_17(<8 x i32>
 define <16 x i16> @mul_v16i16_17(<16 x i16> %a0) nounwind {
 ; X86-LABEL: mul_v16i16_17:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa %xmm0, %xmm2
-; X86-NEXT:    psllw $4, %xmm2
-; X86-NEXT:    paddw %xmm0, %xmm2
-; X86-NEXT:    movdqa %xmm1, %xmm3
-; X86-NEXT:    psllw $4, %xmm3
-; X86-NEXT:    paddw %xmm1, %xmm3
-; X86-NEXT:    movdqa %xmm2, %xmm0
-; X86-NEXT:    movdqa %xmm3, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17]
+; X86-NEXT:    pmullw %xmm2, %xmm0
+; X86-NEXT:    pmullw %xmm2, %xmm1
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: mul_v16i16_17:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa %xmm0, %xmm2
-; X64-NEXT:    psllw $4, %xmm2
-; X64-NEXT:    paddw %xmm0, %xmm2
-; X64-NEXT:    movdqa %xmm1, %xmm3
-; X64-NEXT:    psllw $4, %xmm3
-; X64-NEXT:    paddw %xmm1, %xmm3
-; X64-NEXT:    movdqa %xmm2, %xmm0
-; X64-NEXT:    movdqa %xmm3, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17]
+; X64-NEXT:    pmullw %xmm2, %xmm0
+; X64-NEXT:    pmullw %xmm2, %xmm1
 ; X64-NEXT:    retq
 ;
 ; X64-XOP-LABEL: mul_v16i16_17:
@@ -797,32 +777,16 @@ define <4 x i64> @mul_v4i64_neg1025(<4 x
 define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {
 ; X86-LABEL: mul_v8i32_neg33:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa %xmm0, %xmm3
-; X86-NEXT:    pslld $5, %xmm3
-; X86-NEXT:    paddd %xmm0, %xmm3
-; X86-NEXT:    pxor %xmm2, %xmm2
-; X86-NEXT:    pxor %xmm0, %xmm0
-; X86-NEXT:    psubd %xmm3, %xmm0
-; X86-NEXT:    movdqa %xmm1, %xmm3
-; X86-NEXT:    pslld $5, %xmm3
-; X86-NEXT:    paddd %xmm1, %xmm3
-; X86-NEXT:    psubd %xmm3, %xmm2
-; X86-NEXT:    movdqa %xmm2, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263]
+; X86-NEXT:    pmulld %xmm2, %xmm0
+; X86-NEXT:    pmulld %xmm2, %xmm1
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: mul_v8i32_neg33:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa %xmm0, %xmm3
-; X64-NEXT:    pslld $5, %xmm3
-; X64-NEXT:    paddd %xmm0, %xmm3
-; X64-NEXT:    pxor %xmm2, %xmm2
-; X64-NEXT:    pxor %xmm0, %xmm0
-; X64-NEXT:    psubd %xmm3, %xmm0
-; X64-NEXT:    movdqa %xmm1, %xmm3
-; X64-NEXT:    pslld $5, %xmm3
-; X64-NEXT:    paddd %xmm1, %xmm3
-; X64-NEXT:    psubd %xmm3, %xmm2
-; X64-NEXT:    movdqa %xmm2, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263]
+; X64-NEXT:    pmulld %xmm2, %xmm0
+; X64-NEXT:    pmulld %xmm2, %xmm1
 ; X64-NEXT:    retq
 ;
 ; X64-XOP-LABEL: mul_v8i32_neg33:
@@ -855,32 +819,16 @@ define <8 x i32> @mul_v8i32_neg33(<8 x i
 define <16 x i16> @mul_v16i16_neg9(<16 x i16> %a0) nounwind {
 ; X86-LABEL: mul_v16i16_neg9:
 ; X86:       # %bb.0:
-; X86-NEXT:    movdqa %xmm0, %xmm3
-; X86-NEXT:    psllw $3, %xmm3
-; X86-NEXT:    paddw %xmm0, %xmm3
-; X86-NEXT:    pxor %xmm2, %xmm2
-; X86-NEXT:    pxor %xmm0, %xmm0
-; X86-NEXT:    psubw %xmm3, %xmm0
-; X86-NEXT:    movdqa %xmm1, %xmm3
-; X86-NEXT:    psllw $3, %xmm3
-; X86-NEXT:    paddw %xmm1, %xmm3
-; X86-NEXT:    psubw %xmm3, %xmm2
-; X86-NEXT:    movdqa %xmm2, %xmm1
+; X86-NEXT:    movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527]
+; X86-NEXT:    pmullw %xmm2, %xmm0
+; X86-NEXT:    pmullw %xmm2, %xmm1
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: mul_v16i16_neg9:
 ; X64:       # %bb.0:
-; X64-NEXT:    movdqa %xmm0, %xmm3
-; X64-NEXT:    psllw $3, %xmm3
-; X64-NEXT:    paddw %xmm0, %xmm3
-; X64-NEXT:    pxor %xmm2, %xmm2
-; X64-NEXT:    pxor %xmm0, %xmm0
-; X64-NEXT:    psubw %xmm3, %xmm0
-; X64-NEXT:    movdqa %xmm1, %xmm3
-; X64-NEXT:    psllw $3, %xmm3
-; X64-NEXT:    paddw %xmm1, %xmm3
-; X64-NEXT:    psubw %xmm3, %xmm2
-; X64-NEXT:    movdqa %xmm2, %xmm1
+; X64-NEXT:    movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527]
+; X64-NEXT:    pmullw %xmm2, %xmm0
+; X64-NEXT:    pmullw %xmm2, %xmm1
 ; X64-NEXT:    retq
 ;
 ; X64-XOP-LABEL: mul_v16i16_neg9: