[llvm] r347966 - [X86] Change the pre-sse4.1 code in the v16i8 MULHU lowering to be what we get after DAG combine cleans it up.

Fri Nov 30 00:32:02 PST 2018

Author: ctopper
Date: Fri Nov 30 00:32:01 2018
New Revision: 347966

URL: http://llvm.org/viewvc/llvm-project?rev=347966&view=rev
Log:
[X86] Change the pre-sse4.1 code in the v16i8 MULHU lowering to be what we get after DAG combine cleans it up.

Previously we emitted a punpcklbw/punpckhbw to move the byte elements into the upper half of 16 bit elements then shifted right by 8 to zero the upper bits. After DAG combine we end up with punpcklbw/punpckhbw into the lower bits with zeros in the uppers bits and no shifts. So just emit that directly.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=347966&r1=347965&r2=347966&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Nov 30 00:32:01 2018
@@ -23654,7 +23654,6 @@ static SDValue LowerMULH(SDValue Op, con
 
   // With SSE41 we can use sign/zero extend, but for pre-SSE41 we unpack
   // and then ashr/lshr the upper bits down to the lower bits before multiply.
-  unsigned ExShift = IsSigned ? X86ISD::VSRAI : X86ISD::VSRLI;
   unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
 
   // For 512-bit vectors, split into 256-bit vectors to allow the
@@ -23722,15 +23721,18 @@ static SDValue LowerMULH(SDValue Op, con
   if (Subtarget.hasSSE41()) {
     ALo = DAG.getNode(ExSSE41, dl, ExVT, A);
     BLo = DAG.getNode(ExSSE41, dl, ExVT, B);
+  } else if (IsSigned) {
+    ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), A);
+    BLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), B);
+    ALo = DAG.getBitcast(ExVT, ALo);
+    BLo = DAG.getBitcast(ExVT, BLo);
+    ALo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, ALo, 8, DAG);
+    BLo = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BLo, 8, DAG);
   } else {
-    const int ShufMask[] = {-1, 0, -1, 1, -1, 2, -1, 3,
-                            -1, 4, -1, 5, -1, 6, -1, 7};
-    ALo = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
-    BLo = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
+    ALo = getUnpackl(DAG, dl, VT, A, DAG.getConstant(0, dl, VT));
+    BLo = getUnpackl(DAG, dl, VT, B, DAG.getConstant(0, dl, VT));
     ALo = DAG.getBitcast(ExVT, ALo);
     BLo = DAG.getBitcast(ExVT, BLo);
-    ALo = getTargetVShiftByConstNode(ExShift, dl, ExVT, ALo, 8, DAG);
-    BLo = getTargetVShiftByConstNode(ExShift, dl, ExVT, BLo, 8, DAG);
   }
 
   // Extract the hi parts and zero/sign extend to i16.
@@ -23742,15 +23744,18 @@ static SDValue LowerMULH(SDValue Op, con
     BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
     AHi = DAG.getNode(ExSSE41, dl, ExVT, AHi);
     BHi = DAG.getNode(ExSSE41, dl, ExVT, BHi);
+  } else if (IsSigned) {
+    AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), A);
+    BHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), B);
+    AHi = DAG.getBitcast(ExVT, AHi);
+    BHi = DAG.getBitcast(ExVT, BHi);
+    AHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, AHi, 8, DAG);
+    BHi = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, BHi, 8, DAG);
   } else {
-    const int ShufMask[] = {-1, 8,  -1, 9,  -1, 10, -1, 11,
-                            -1, 12, -1, 13, -1, 14, -1, 15};
-    AHi = DAG.getVectorShuffle(VT, dl, A, A, ShufMask);
-    BHi = DAG.getVectorShuffle(VT, dl, B, B, ShufMask);
+    AHi = getUnpackh(DAG, dl, VT, A, DAG.getConstant(0, dl, VT));
+    BHi = getUnpackh(DAG, dl, VT, B, DAG.getConstant(0, dl, VT));
     AHi = DAG.getBitcast(ExVT, AHi);
     BHi = DAG.getBitcast(ExVT, BHi);
-    AHi = getTargetVShiftByConstNode(ExShift, dl, ExVT, AHi, 8, DAG);
-    BHi = getTargetVShiftByConstNode(ExShift, dl, ExVT, BHi, 8, DAG);
   }
 
   // Multiply, lshr the upper 8bits to the lower 8bits of the lo/hi results and