[llvm] r372535 - [X86][SelectionDAGBuilder] Move the hack for handling MMX shift by i32 intrinsics into the X86 backend.

Sun Sep 22 18:05:33 PDT 2019

Author: ctopper
Date: Sun Sep 22 18:05:33 2019
New Revision: 372535

URL: http://llvm.org/viewvc/llvm-project?rev=372535&view=rev
Log:
[X86][SelectionDAGBuilder] Move the hack for handling MMX shift by i32 intrinsics into the X86 backend.

This intrinsics should be shift by immediate, but gcc allows any
i32 scalar and clang needs to match that. So we try to detect the
non-constant case and move the data from an integer register to an
MMX register.

Previously this was done by creating a v2i32 build_vector and
bitcast in SelectionDAGBuilder. This had to be done early since
v2i32 isn't a legal type. The bitcast+build_vector would be DAG
combined to X86ISD::MMX_MOVW2D which isel will turn into a
GPR->MMX MOVD.

This commit just moves the whole thing to lowering and emits
the X86ISD::MMX_MOVW2D directly to avoid the illegal type. The
test changes just seem to be due to nodes being linearized in a
different order.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll
    llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=372535&r1=372534&r2=372535&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Sun Sep 22 18:05:33 2019
@@ -5969,65 +5969,6 @@ void SelectionDAGBuilder::visitIntrinsic
   case Intrinsic::masked_compressstore:
     visitMaskedStore(I, true /* IsCompressing */);
     return;
-  case Intrinsic::x86_mmx_pslli_w:
-  case Intrinsic::x86_mmx_pslli_d:
-  case Intrinsic::x86_mmx_pslli_q:
-  case Intrinsic::x86_mmx_psrli_w:
-  case Intrinsic::x86_mmx_psrli_d:
-  case Intrinsic::x86_mmx_psrli_q:
-  case Intrinsic::x86_mmx_psrai_w:
-  case Intrinsic::x86_mmx_psrai_d: {
-    SDValue ShAmt = getValue(I.getArgOperand(1));
-    if (isa<ConstantSDNode>(ShAmt)) {
-      visitTargetIntrinsic(I, Intrinsic);
-      return;
-    }
-    unsigned NewIntrinsic = 0;
-    EVT ShAmtVT = MVT::v2i32;
-    switch (Intrinsic) {
-    case Intrinsic::x86_mmx_pslli_w:
-      NewIntrinsic = Intrinsic::x86_mmx_psll_w;
-      break;
-    case Intrinsic::x86_mmx_pslli_d:
-      NewIntrinsic = Intrinsic::x86_mmx_psll_d;
-      break;
-    case Intrinsic::x86_mmx_pslli_q:
-      NewIntrinsic = Intrinsic::x86_mmx_psll_q;
-      break;
-    case Intrinsic::x86_mmx_psrli_w:
-      NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
-      break;
-    case Intrinsic::x86_mmx_psrli_d:
-      NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
-      break;
-    case Intrinsic::x86_mmx_psrli_q:
-      NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
-      break;
-    case Intrinsic::x86_mmx_psrai_w:
-      NewIntrinsic = Intrinsic::x86_mmx_psra_w;
-      break;
-    case Intrinsic::x86_mmx_psrai_d:
-      NewIntrinsic = Intrinsic::x86_mmx_psra_d;
-      break;
-    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
-    }
-
-    // The vector shift intrinsics with scalars uses 32b shift amounts but
-    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
-    // to be zero.
-    // We must do this early because v2i32 is not a legal type.
-    SDValue ShOps[2];
-    ShOps[0] = ShAmt;
-    ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
-    ShAmt =  DAG.getBuildVector(ShAmtVT, sdl, ShOps);
-    EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
-    ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
-    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
-                       DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
-                       getValue(I.getArgOperand(0)), ShAmt);
-    setValue(&I, Res);
-    return;
-  }
   case Intrinsic::powi:
     setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
                             getValue(I.getArgOperand(1)), DAG));

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=372535&r1=372534&r2=372535&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 22 18:05:33 2019
@@ -23738,6 +23738,58 @@ SDValue X86TargetLowering::LowerINTRINSI
                                                  MaskVT, Operation);
     return DAG.getMergeValues({Result0, Result1}, DL);
   }
+  case Intrinsic::x86_mmx_pslli_w:
+  case Intrinsic::x86_mmx_pslli_d:
+  case Intrinsic::x86_mmx_pslli_q:
+  case Intrinsic::x86_mmx_psrli_w:
+  case Intrinsic::x86_mmx_psrli_d:
+  case Intrinsic::x86_mmx_psrli_q:
+  case Intrinsic::x86_mmx_psrai_w:
+  case Intrinsic::x86_mmx_psrai_d: {
+    SDValue ShAmt = Op.getOperand(2);
+    // If the argument is a constant, this is fine.
+    if (isa<ConstantSDNode>(ShAmt))
+      return Op;
+
+    unsigned NewIntrinsic;
+    switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::x86_mmx_pslli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+      break;
+    case Intrinsic::x86_mmx_pslli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+      break;
+    case Intrinsic::x86_mmx_pslli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+      break;
+    case Intrinsic::x86_mmx_psrli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+      break;
+    case Intrinsic::x86_mmx_psrli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+      break;
+    case Intrinsic::x86_mmx_psrli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+      break;
+    case Intrinsic::x86_mmx_psrai_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+      break;
+    case Intrinsic::x86_mmx_psrai_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+      break;
+    }
+
+    // The vector shift intrinsics with scalars uses 32b shift amounts but
+    // the sse2/mmx shift instructions reads 64 bits. Copy the 32 bits to an
+    // MMX register.
+    SDLoc DL(Op);
+    ShAmt = DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, ShAmt);
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
+                       DAG.getConstant(NewIntrinsic, DL, MVT::i32),
+                       Op.getOperand(1), ShAmt);
+
+  }
   }
 }
 

Modified: llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll?rev=372535&r1=372534&r2=372535&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-mmx.ll Sun Sep 22 18:05:33 2019
@@ -34,10 +34,10 @@ define i64 @t1(i64 %x, i32 %n) nounwind
 ; X86-NEXT:    movl %esp, %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 16(%ebp), %mm0
-; X86-NEXT:    movq 8(%ebp), %mm1
-; X86-NEXT:    psllq %mm0, %mm1
-; X86-NEXT:    movq %mm1, (%esp)
+; X86-NEXT:    movq 8(%ebp), %mm0
+; X86-NEXT:    movd 16(%ebp), %mm1
+; X86-NEXT:    psllq %mm1, %mm0
+; X86-NEXT:    movq %mm0, (%esp)
 ; X86-NEXT:    movl (%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl %ebp, %esp
@@ -46,10 +46,10 @@ define i64 @t1(i64 %x, i32 %n) nounwind
 ;
 ; X64-LABEL: t1:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movd %esi, %mm0
-; X64-NEXT:    movq %rdi, %mm1
-; X64-NEXT:    psllq %mm0, %mm1
-; X64-NEXT:    movq %mm1, %rax
+; X64-NEXT:    movq %rdi, %mm0
+; X64-NEXT:    movd %esi, %mm1
+; X64-NEXT:    psllq %mm1, %mm0
+; X64-NEXT:    movq %mm0, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = bitcast i64 %x to x86_mmx
@@ -65,11 +65,11 @@ define i64 @t2(i64 %x, i32 %n, i32 %w) n
 ; X86-NEXT:    movl %esp, %ebp
 ; X86-NEXT:    andl $-8, %esp
 ; X86-NEXT:    subl $8, %esp
-; X86-NEXT:    movd 16(%ebp), %mm0
-; X86-NEXT:    movd 20(%ebp), %mm1
-; X86-NEXT:    psllq %mm0, %mm1
-; X86-NEXT:    por 8(%ebp), %mm1
-; X86-NEXT:    movq %mm1, (%esp)
+; X86-NEXT:    movd 20(%ebp), %mm0
+; X86-NEXT:    movd 16(%ebp), %mm1
+; X86-NEXT:    psllq %mm1, %mm0
+; X86-NEXT:    por 8(%ebp), %mm0
+; X86-NEXT:    movq %mm0, (%esp)
 ; X86-NEXT:    movl (%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl %ebp, %esp
@@ -78,12 +78,12 @@ define i64 @t2(i64 %x, i32 %n, i32 %w) n
 ;
 ; X64-LABEL: t2:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    movd %esi, %mm0
-; X64-NEXT:    movd %edx, %mm1
-; X64-NEXT:    psllq %mm0, %mm1
-; X64-NEXT:    movq %rdi, %mm0
-; X64-NEXT:    por %mm1, %mm0
-; X64-NEXT:    movq %mm0, %rax
+; X64-NEXT:    movd %edx, %mm0
+; X64-NEXT:    movd %esi, %mm1
+; X64-NEXT:    psllq %mm1, %mm0
+; X64-NEXT:    movq %rdi, %mm1
+; X64-NEXT:    por %mm0, %mm1
+; X64-NEXT:    movq %mm1, %rax
 ; X64-NEXT:    retq
 entry:
   %0 = insertelement <2 x i32> undef, i32 %w, i32 0

Modified: llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll?rev=372535&r1=372534&r2=372535&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mmx-fold-load.ll Sun Sep 22 18:05:33 2019
@@ -585,22 +585,22 @@ define void @test_psrlq_by_volatile_shif
 ; X86-NEXT:    pushl %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl $1, (%esp)
-; X86-NEXT:    movd (%esp), %mm0
 ; X86-NEXT:    movl $255, %ecx
-; X86-NEXT:    movd %ecx, %mm1
-; X86-NEXT:    psrlq %mm0, %mm1
-; X86-NEXT:    movq %mm1, (%eax)
+; X86-NEXT:    movd %ecx, %mm0
+; X86-NEXT:    movd (%esp), %mm1
+; X86-NEXT:    psrlq %mm1, %mm0
+; X86-NEXT:    movq %mm0, (%eax)
 ; X86-NEXT:    popl %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_psrlq_by_volatile_shift_amount:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    movl $1, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movd -{{[0-9]+}}(%rsp), %mm0
 ; X64-NEXT:    movl $255, %eax
-; X64-NEXT:    movd %eax, %mm1
-; X64-NEXT:    psrlq %mm0, %mm1
-; X64-NEXT:    movq %mm1, (%rdi)
+; X64-NEXT:    movd %eax, %mm0
+; X64-NEXT:    movd -{{[0-9]+}}(%rsp), %mm1
+; X64-NEXT:    psrlq %mm1, %mm0
+; X64-NEXT:    movq %mm0, (%rdi)
 ; X64-NEXT:    retq
 entry:
   %0 = alloca i32, align 4