[llvm] r326497 - [X86][MMX] Improve handling of 64-bit MMX constants

Thu Mar 1 14:22:31 PST 2018

Author: rksimon
Date: Thu Mar  1 14:22:31 2018
New Revision: 326497

URL: http://llvm.org/viewvc/llvm-project?rev=326497&view=rev
Log:
[X86][MMX] Improve handling of 64-bit MMX constants

64-bit MMX constant generation usually ends up lowering into SSE instructions before being spilled/reloaded as a MMX type.

This patch bitcasts the constant to a double value to allow correct loading directly to the MMX register.

I've added MMX constant asm comment support to improve testing, it's better to always print the double values as hex constants as MMX is mainly an integer unit (and even with 3DNow! its just floats).

Differential Revision: https://reviews.llvm.org/D43616

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
    llvm/trunk/test/CodeGen/X86/fast-isel-bc.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=326497&r1=326496&r2=326497&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Mar  1 14:22:31 2018
@@ -30833,14 +30833,19 @@ static SDValue combineBitcast(SDNode *N,
   // it's better to handle them early to be sure we emit efficient code by
   // avoiding store-load conversions.
   if (VT == MVT::x86mmx) {
-    // Detect zero-extended MMX constant vectors.
+    // Detect MMX constant vectors.
     APInt UndefElts;
-    SmallVector<APInt, 2> EltBits;
-    if (getTargetConstantBitsFromNode(N0, 32, UndefElts, EltBits) &&
-        EltBits[1] == 0) {
+    SmallVector<APInt, 1> EltBits;
+    if (getTargetConstantBitsFromNode(N0, 64, UndefElts, EltBits)) {
       SDLoc DL(N0);
-      return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,
-                         DAG.getConstant(EltBits[0], DL, MVT::i32));
+      // Handle zero-extension of i32 with MOVD.
+      if (EltBits[0].countLeadingZeros() >= 32)
+        return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,
+                           DAG.getConstant(EltBits[0].trunc(32), DL, MVT::i32));
+      // Else, bitcast to a double.
+      // TODO - investigate supporting sext 32-bit immediates on x86_64.
+      APFloat F64(APFloat::IEEEdouble(), EltBits[0]);
+      return DAG.getBitcast(VT, DAG.getConstantFP(F64, DL, MVT::f64));
     }
 
     // Detect bitcasts to x86mmx low word.

Modified: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MCInstLower.cpp?rev=326497&r1=326496&r2=326497&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp Thu Mar  1 14:22:31 2018
@@ -1822,6 +1822,24 @@ void X86AsmPrinter::EmitInstruction(cons
     break;
   }
 
+  case X86::MMX_MOVQ64rm: {
+    if (!OutStreamer->isVerboseAsm())
+      break;
+    if (MI->getNumOperands() <= 4)
+      break;
+    if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
+      std::string Comment;
+      raw_string_ostream CS(Comment);
+      const MachineOperand &DstOp = MI->getOperand(0);
+      CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
+      if (auto *CF = dyn_cast<ConstantFP>(C)) {
+        CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
+        OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+      }
+    }
+    break;
+  }
+
 #define MOV_CASE(Prefix, Suffix)        \
   case X86::Prefix##MOVAPD##Suffix##rm: \
   case X86::Prefix##MOVAPS##Suffix##rm: \

Modified: llvm/trunk/test/CodeGen/X86/fast-isel-bc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-bc.ll?rev=326497&r1=326496&r2=326497&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-bc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-bc.ll Thu Mar  1 14:22:31 2018
@@ -7,19 +7,12 @@
 declare void @func2(x86_mmx)
 
 ; This isn't spectacular, but it's MMX code at -O0...
-; For now, handling of x86_mmx parameters in fast Isel is unimplemented,
-; so we get pretty poor code.  The below is preferable.
-; CHEK: movl $2, %eax
-; CHEK: movd %rax, %mm0
-; CHEK: movd %mm0, %rdi
 
 define void @func1() nounwind {
 ; X86-LABEL: func1:
 ; X86:       ## %bb.0:
 ; X86-NEXT:    subl $12, %esp
-; X86-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-NEXT:    movsd %xmm0, (%esp)
-; X86-NEXT:    movq (%esp), %mm0
+; X86-NEXT:    movq LCPI0_0, %mm0 ## mm0 = 0x200000000
 ; X86-NEXT:    calll _func2
 ; X86-NEXT:    addl $12, %esp
 ; X86-NEXT:    retl
@@ -27,13 +20,7 @@ define void @func1() nounwind {
 ; X64-LABEL: func1:
 ; X64:       ## %bb.0:
 ; X64-NEXT:    pushq %rax
-; X64-NEXT:    movl $2, %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    movq %rcx, %xmm0
-; X64-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-NEXT:    movq %xmm0, (%rsp)
-; X64-NEXT:    movq (%rsp), %mm0
+; X64-NEXT:    movq {{.*}}(%rip), %mm0 ## mm0 = 0x200000000
 ; X64-NEXT:    movq2dq %mm0, %xmm0
 ; X64-NEXT:    callq _func2
 ; X64-NEXT:    popq %rax

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll?rev=326497&r1=326496&r2=326497&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-mmx.ll Thu Mar  1 14:22:31 2018
@@ -33,25 +33,18 @@ define void @test1() {
 ; X32:       ## %bb.0: ## %entry
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    subl $8, %esp
-; X32-NEXT:    .cfi_def_cfa_offset 16
 ; X32-NEXT:    .cfi_offset %edi, -8
 ; X32-NEXT:    pxor %mm0, %mm0
-; X32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    movsd %xmm0, (%esp)
-; X32-NEXT:    movq (%esp), %mm1
+; X32-NEXT:    movq LCPI1_0, %mm1 ## mm1 = 0x7070606040400000
 ; X32-NEXT:    xorl %edi, %edi
 ; X32-NEXT:    maskmovq %mm1, %mm0
-; X32-NEXT:    addl $8, %esp
 ; X32-NEXT:    popl %edi
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test1:
 ; X64:       ## %bb.0: ## %entry
 ; X64-NEXT:    pxor %mm0, %mm0
-; X64-NEXT:    movq {{.*}}(%rip), %rax
-; X64-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
-; X64-NEXT:    movq -{{[0-9]+}}(%rsp), %mm1
+; X64-NEXT:    movq {{.*}}(%rip), %mm1 ## mm1 = 0x7070606040400000
 ; X64-NEXT:    xorl %edi, %edi
 ; X64-NEXT:    maskmovq %mm1, %mm0
 ; X64-NEXT:    retq