[llvm] r348975 - [X86] Don't emit MULX by default with BMI2

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 12 13:21:31 PST 2018


Author: ctopper
Date: Wed Dec 12 13:21:31 2018
New Revision: 348975

URL: http://llvm.org/viewvc/llvm-project?rev=348975&view=rev
Log:
[X86] Don't emit MULX by default with BMI2

MULX has somewhat improved register allocation constraints compared to the legacy MUL instruction. Both output registers are encoded instead of fixed to EAX/EDX, but EDX is used as input. It also doesn't touch flags. Unfortunately, the encoding is longer.

Prefering it whenever BMI2 is enabled is probably not optimal. Choosing it should somehow be a function of register allocation constraints like converting adds to three address. gcc and icc definitely don't pick MULX by default. Not sure what if any rules they have for using it.

Differential Revision: https://reviews.llvm.org/D55565

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/test/CodeGen/X86/bmi2-x86_64.ll
    llvm/trunk/test/CodeGen/X86/bmi2.ll
    llvm/trunk/test/CodeGen/X86/i128-mul.ll
    llvm/trunk/test/CodeGen/X86/mulx32.ll
    llvm/trunk/test/CodeGen/X86/mulx64.ll
    llvm/trunk/test/CodeGen/X86/pr35636.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=348975&r1=348974&r2=348975&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed Dec 12 13:21:31 2018
@@ -3410,14 +3410,11 @@ void X86DAGToDAGISel::Select(SDNode *Nod
 
     unsigned Opc, MOpc;
     bool isSigned = Opcode == ISD::SMUL_LOHI;
-    bool hasBMI2 = Subtarget->hasBMI2();
     if (!isSigned) {
       switch (NVT.SimpleTy) {
       default: llvm_unreachable("Unsupported VT!");
-      case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
-                     MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
-      case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
-                     MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
+      case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+      case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
       }
     } else {
       switch (NVT.SimpleTy) {
@@ -3438,12 +3435,6 @@ void X86DAGToDAGISel::Select(SDNode *Nod
     case X86::MUL64r:
       SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
       break;
-    case X86::MULX32rr:
-      SrcReg = X86::EDX; LoReg = HiReg = 0;
-      break;
-    case X86::MULX64rr:
-      SrcReg = X86::RDX; LoReg = HiReg = 0;
-      break;
     }
 
     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
@@ -3457,26 +3448,15 @@ void X86DAGToDAGISel::Select(SDNode *Nod
 
     SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
                                           N0, SDValue()).getValue(1);
-    SDValue ResHi, ResLo;
-
     if (foldedLoad) {
       SDValue Chain;
       MachineSDNode *CNode = nullptr;
       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
                         InFlag };
-      if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
-        SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
-        CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
-        ResHi = SDValue(CNode, 0);
-        ResLo = SDValue(CNode, 1);
-        Chain = SDValue(CNode, 2);
-        InFlag = SDValue(CNode, 3);
-      } else {
-        SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
-        CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
-        Chain = SDValue(CNode, 0);
-        InFlag = SDValue(CNode, 1);
-      }
+      SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
+      CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+      Chain = SDValue(CNode, 0);
+      InFlag = SDValue(CNode, 1);
 
       // Update the chain.
       ReplaceUses(N1.getValue(1), Chain);
@@ -3484,39 +3464,27 @@ void X86DAGToDAGISel::Select(SDNode *Nod
       CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
     } else {
       SDValue Ops[] = { N1, InFlag };
-      if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
-        SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
-        ResHi = SDValue(CNode, 0);
-        ResLo = SDValue(CNode, 1);
-        InFlag = SDValue(CNode, 2);
-      } else {
-        SDVTList VTs = CurDAG->getVTList(MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
-        InFlag = SDValue(CNode, 0);
-      }
+      SDVTList VTs = CurDAG->getVTList(MVT::Glue);
+      SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
+      InFlag = SDValue(CNode, 0);
     }
 
     // Copy the low half of the result, if it is needed.
     if (!SDValue(Node, 0).use_empty()) {
-      if (!ResLo.getNode()) {
-        assert(LoReg && "Register for low half is not defined!");
-        ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
-                                       InFlag);
-        InFlag = ResLo.getValue(2);
-      }
+      assert(LoReg && "Register for low half is not defined!");
+      SDValue ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
+                                             NVT, InFlag);
+      InFlag = ResLo.getValue(2);
       ReplaceUses(SDValue(Node, 0), ResLo);
       LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
                  dbgs() << '\n');
     }
     // Copy the high half of the result, if it is needed.
     if (!SDValue(Node, 1).use_empty()) {
-      if (!ResHi.getNode()) {
-        assert(HiReg && "Register for high half is not defined!");
-        ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
-                                       InFlag);
-        InFlag = ResHi.getValue(2);
-      }
+      assert(HiReg && "Register for high half is not defined!");
+      SDValue ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
+                                             NVT, InFlag);
+      InFlag = ResHi.getValue(2);
       ReplaceUses(SDValue(Node, 1), ResHi);
       LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
                  dbgs() << '\n');

Modified: llvm/trunk/test/CodeGen/X86/bmi2-x86_64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi2-x86_64.ll?rev=348975&r1=348974&r2=348975&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi2-x86_64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi2-x86_64.ll Wed Dec 12 13:21:31 2018
@@ -68,8 +68,8 @@ define i64 @mulx64(i64 %x, i64 %y, i64*
 ; CHECK-LABEL: mulx64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rdx, %rcx
-; CHECK-NEXT:    movq %rdi, %rdx
-; CHECK-NEXT:    mulxq %rsi, %rax, %rdx
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    mulq %rsi
 ; CHECK-NEXT:    movq %rdx, (%rcx)
 ; CHECK-NEXT:    retq
   %x1 = zext i64 %x to i128
@@ -86,8 +86,8 @@ define i64 @mulx64_load(i64 %x, i64* %y,
 ; CHECK-LABEL: mulx64_load:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rdx, %rcx
-; CHECK-NEXT:    movq %rdi, %rdx
-; CHECK-NEXT:    mulxq (%rsi), %rax, %rdx
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    mulq (%rsi)
 ; CHECK-NEXT:    movq %rdx, (%rcx)
 ; CHECK-NEXT:    retq
   %y1 = load i64, i64* %y

Modified: llvm/trunk/test/CodeGen/X86/bmi2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bmi2.ll?rev=348975&r1=348974&r2=348975&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bmi2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bmi2.ll Wed Dec 12 13:21:31 2018
@@ -120,11 +120,11 @@ define i32 @mulx32(i32 %x, i32 %y, i32*
 ; X86-LABEL: mulx32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    addl %edx, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    addl %eax, %eax
-; X86-NEXT:    mulxl %eax, %eax, %edx
+; X86-NEXT:    addl %edx, %edx
+; X86-NEXT:    mull %edx
 ; X86-NEXT:    movl %edx, (%ecx)
 ; X86-NEXT:    retl
 ;
@@ -156,10 +156,10 @@ define i32 @mulx32_load(i32 %x, i32* %y,
 ; X86-LABEL: mulx32_load:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    addl %edx, %edx
-; X86-NEXT:    mulxl (%eax), %eax, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    mull (%edx)
 ; X86-NEXT:    movl %edx, (%ecx)
 ; X86-NEXT:    retl
 ;

Modified: llvm/trunk/test/CodeGen/X86/i128-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/i128-mul.ll?rev=348975&r1=348974&r2=348975&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/i128-mul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/i128-mul.ll Wed Dec 12 13:21:31 2018
@@ -7,108 +7,61 @@
 ; PR1198
 
 define i64 @foo(i64 %x, i64 %y) nounwind {
-; X86-NOBMI-LABEL: foo:
-; X86-NOBMI:       # %bb.0:
-; X86-NOBMI-NEXT:    pushl %ebp
-; X86-NOBMI-NEXT:    pushl %ebx
-; X86-NOBMI-NEXT:    pushl %edi
-; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NOBMI-NEXT:    movl %ecx, %eax
-; X86-NOBMI-NEXT:    mull %ebp
-; X86-NOBMI-NEXT:    movl %edx, %ebx
-; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:    mull %ebp
-; X86-NOBMI-NEXT:    movl %edx, %ebp
-; X86-NOBMI-NEXT:    movl %eax, %esi
-; X86-NOBMI-NEXT:    addl %ebx, %esi
-; X86-NOBMI-NEXT:    adcl $0, %ebp
-; X86-NOBMI-NEXT:    movl %ecx, %eax
-; X86-NOBMI-NEXT:    mull %edi
-; X86-NOBMI-NEXT:    movl %edx, %ebx
-; X86-NOBMI-NEXT:    addl %esi, %eax
-; X86-NOBMI-NEXT:    adcl %ebp, %ebx
-; X86-NOBMI-NEXT:    setb %al
-; X86-NOBMI-NEXT:    movzbl %al, %ecx
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    mull %edi
-; X86-NOBMI-NEXT:    movl %edx, %esi
-; X86-NOBMI-NEXT:    movl %eax, %ebp
-; X86-NOBMI-NEXT:    addl %ebx, %ebp
-; X86-NOBMI-NEXT:    adcl %ecx, %esi
-; X86-NOBMI-NEXT:    xorl %ecx, %ecx
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    mull %ecx
-; X86-NOBMI-NEXT:    movl %edx, %edi
-; X86-NOBMI-NEXT:    movl %eax, %ebx
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    mull %ecx
-; X86-NOBMI-NEXT:    addl %ebx, %eax
-; X86-NOBMI-NEXT:    adcl %edi, %edx
-; X86-NOBMI-NEXT:    addl %ebp, %eax
-; X86-NOBMI-NEXT:    adcl %esi, %edx
-; X86-NOBMI-NEXT:    popl %esi
-; X86-NOBMI-NEXT:    popl %edi
-; X86-NOBMI-NEXT:    popl %ebx
-; X86-NOBMI-NEXT:    popl %ebp
-; X86-NOBMI-NEXT:    retl
-;
-; X86-BMI-LABEL: foo:
-; X86-BMI:       # %bb.0:
-; X86-BMI-NEXT:    pushl %ebp
-; X86-BMI-NEXT:    pushl %ebx
-; X86-BMI-NEXT:    pushl %edi
-; X86-BMI-NEXT:    pushl %esi
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-BMI-NEXT:    movl %ecx, %edx
-; X86-BMI-NEXT:    mulxl %esi, %edx, %ebx
-; X86-BMI-NEXT:    movl %eax, %edx
-; X86-BMI-NEXT:    mulxl %esi, %ebp, %eax
-; X86-BMI-NEXT:    addl %ebx, %ebp
-; X86-BMI-NEXT:    adcl $0, %eax
-; X86-BMI-NEXT:    movl %ecx, %edx
-; X86-BMI-NEXT:    mulxl %edi, %edx, %ebx
-; X86-BMI-NEXT:    addl %ebp, %edx
-; X86-BMI-NEXT:    adcl %eax, %ebx
-; X86-BMI-NEXT:    setb %al
-; X86-BMI-NEXT:    movzbl %al, %eax
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI-NEXT:    mulxl %edi, %edi, %ebp
-; X86-BMI-NEXT:    addl %ebx, %edi
-; X86-BMI-NEXT:    adcl %eax, %ebp
-; X86-BMI-NEXT:    xorl %eax, %eax
-; X86-BMI-NEXT:    movl %esi, %edx
-; X86-BMI-NEXT:    mulxl %eax, %ebx, %esi
-; X86-BMI-NEXT:    movl %ecx, %edx
-; X86-BMI-NEXT:    mulxl %eax, %eax, %edx
-; X86-BMI-NEXT:    addl %ebx, %eax
-; X86-BMI-NEXT:    adcl %esi, %edx
-; X86-BMI-NEXT:    addl %edi, %eax
-; X86-BMI-NEXT:    adcl %ebp, %edx
-; X86-BMI-NEXT:    popl %esi
-; X86-BMI-NEXT:    popl %edi
-; X86-BMI-NEXT:    popl %ebx
-; X86-BMI-NEXT:    popl %ebp
-; X86-BMI-NEXT:    retl
-;
-; X64-NOBMI-LABEL: foo:
-; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    movq %rdi, %rax
-; X64-NOBMI-NEXT:    mulq %rsi
-; X64-NOBMI-NEXT:    movq %rdx, %rax
-; X64-NOBMI-NEXT:    retq
-;
-; X64-BMI-LABEL: foo:
-; X64-BMI:       # %bb.0:
-; X64-BMI-NEXT:    movq %rdi, %rdx
-; X64-BMI-NEXT:    mulxq %rsi, %rcx, %rax
-; X64-BMI-NEXT:    retq
+; X86-LABEL: foo:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ebp
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ebx, %esi
+; X86-NEXT:    adcl $0, %ebp
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %ebx
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    adcl %ebp, %ebx
+; X86-NEXT:    setb %al
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    mull %edi
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    addl %ebx, %ebp
+; X86-NEXT:    adcl %ecx, %esi
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    adcl %edi, %edx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    adcl %esi, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: foo:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %rsi
+; X64-NEXT:    movq %rdx, %rax
+; X64-NEXT:    retq
   %tmp0 = zext i64 %x to i128
   %tmp1 = zext i64 %y to i128
   %tmp2 = mul i128 %tmp0, %tmp1
@@ -122,236 +75,125 @@ define i64 @foo(i64 %x, i64 %y) nounwind
 ; zero-extended value.
 
 define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind {
-; X86-NOBMI-LABEL: mul1:
-; X86-NOBMI:       # %bb.0: # %entry
-; X86-NOBMI-NEXT:    pushl %ebp
-; X86-NOBMI-NEXT:    pushl %ebx
-; X86-NOBMI-NEXT:    pushl %edi
-; X86-NOBMI-NEXT:    pushl %esi
-; X86-NOBMI-NEXT:    subl $28, %esp
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    orl %ecx, %eax
-; X86-NOBMI-NEXT:    je .LBB1_3
-; X86-NOBMI-NEXT:  # %bb.1: # %for.body.preheader
-; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:    xorl %ebx, %ebx
-; X86-NOBMI-NEXT:    movl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X86-NOBMI-NEXT:    .p2align 4, 0x90
-; X86-NOBMI-NEXT:  .LBB1_2: # %for.body
-; X86-NOBMI-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-NOBMI-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-NOBMI-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    movl %eax, %ecx
-; X86-NOBMI-NEXT:    movl (%eax,%ebx,8), %ebp
-; X86-NOBMI-NEXT:    movl 4(%eax,%ebx,8), %esi
-; X86-NOBMI-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-NOBMI-NEXT:    movl %ebp, %eax
-; X86-NOBMI-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    mull %ecx
-; X86-NOBMI-NEXT:    movl %edx, %edi
-; X86-NOBMI-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-NOBMI-NEXT:    movl %esi, %eax
-; X86-NOBMI-NEXT:    mull %ecx
-; X86-NOBMI-NEXT:    movl %edx, %ecx
-; X86-NOBMI-NEXT:    movl %eax, %esi
-; X86-NOBMI-NEXT:    addl %edi, %esi
-; X86-NOBMI-NEXT:    adcl $0, %ecx
-; X86-NOBMI-NEXT:    movl %ebp, %eax
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NOBMI-NEXT:    mull %edx
-; X86-NOBMI-NEXT:    movl %edx, %ebp
-; X86-NOBMI-NEXT:    movl %eax, %edi
-; X86-NOBMI-NEXT:    addl %esi, %edi
-; X86-NOBMI-NEXT:    adcl %ecx, %ebp
-; X86-NOBMI-NEXT:    setb {{[0-9]+}}(%esp) # 1-byte Folded Spill
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X86-NOBMI-NEXT:    mull {{[0-9]+}}(%esp)
-; X86-NOBMI-NEXT:    movl %edx, %ecx
-; X86-NOBMI-NEXT:    movl %eax, %esi
-; X86-NOBMI-NEXT:    addl %ebp, %esi
-; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload
-; X86-NOBMI-NEXT:    adcl %eax, %ecx
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:    mull %edx
-; X86-NOBMI-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-NOBMI-NEXT:    movl %eax, %ebp
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax # 4-byte Reload
-; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:    mull %edx
-; X86-NOBMI-NEXT:    addl %ebp, %eax
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; X86-NOBMI-NEXT:    adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload
-; X86-NOBMI-NEXT:    addl %esi, %eax
-; X86-NOBMI-NEXT:    adcl %ecx, %edx
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X86-NOBMI-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X86-NOBMI-NEXT:    adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload
-; X86-NOBMI-NEXT:    adcl $0, %eax
-; X86-NOBMI-NEXT:    adcl $0, %edx
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl %esi, (%ecx,%ebx,8)
-; X86-NOBMI-NEXT:    movl %edi, 4(%ecx,%ebx,8)
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NOBMI-NEXT:    movl %ecx, %edi
-; X86-NOBMI-NEXT:    addl $1, %ebx
-; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X86-NOBMI-NEXT:    adcl $0, %esi
-; X86-NOBMI-NEXT:    movl %ebx, %ecx
-; X86-NOBMI-NEXT:    xorl %ebp, %ecx
-; X86-NOBMI-NEXT:    movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-NOBMI-NEXT:    xorl %edi, %esi
-; X86-NOBMI-NEXT:    orl %ecx, %esi
-; X86-NOBMI-NEXT:    jne .LBB1_2
-; X86-NOBMI-NEXT:  .LBB1_3: # %for.end
-; X86-NOBMI-NEXT:    xorl %eax, %eax
-; X86-NOBMI-NEXT:    xorl %edx, %edx
-; X86-NOBMI-NEXT:    addl $28, %esp
-; X86-NOBMI-NEXT:    popl %esi
-; X86-NOBMI-NEXT:    popl %edi
-; X86-NOBMI-NEXT:    popl %ebx
-; X86-NOBMI-NEXT:    popl %ebp
-; X86-NOBMI-NEXT:    retl
-;
-; X86-BMI-LABEL: mul1:
-; X86-BMI:       # %bb.0: # %entry
-; X86-BMI-NEXT:    pushl %ebp
-; X86-BMI-NEXT:    pushl %ebx
-; X86-BMI-NEXT:    pushl %edi
-; X86-BMI-NEXT:    pushl %esi
-; X86-BMI-NEXT:    subl $20, %esp
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI-NEXT:    orl %ecx, %eax
-; X86-BMI-NEXT:    je .LBB1_3
-; X86-BMI-NEXT:  # %bb.1: # %for.body.preheader
-; X86-BMI-NEXT:    xorl %ecx, %ecx
-; X86-BMI-NEXT:    xorl %edx, %edx
-; X86-BMI-NEXT:    xorl %edi, %edi
-; X86-BMI-NEXT:    movl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill
-; X86-BMI-NEXT:    .p2align 4, 0x90
-; X86-BMI-NEXT:  .LBB1_2: # %for.body
-; X86-BMI-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-BMI-NEXT:    movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-BMI-NEXT:    movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI-NEXT:    movl (%eax,%edi,8), %ecx
-; X86-BMI-NEXT:    movl 4(%eax,%edi,8), %ebx
-; X86-BMI-NEXT:    movl %ebx, (%esp) # 4-byte Spill
-; X86-BMI-NEXT:    movl %ecx, %edx
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI-NEXT:    movl %eax, %esi
-; X86-BMI-NEXT:    mulxl %eax, %eax, %ebp
-; X86-BMI-NEXT:    movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-BMI-NEXT:    movl %ebx, %edx
-; X86-BMI-NEXT:    mulxl %esi, %eax, %esi
-; X86-BMI-NEXT:    addl %ebp, %eax
-; X86-BMI-NEXT:    adcl $0, %esi
-; X86-BMI-NEXT:    movl %ecx, %edx
-; X86-BMI-NEXT:    mulxl {{[0-9]+}}(%esp), %ebp, %ebx
-; X86-BMI-NEXT:    addl %eax, %ebp
-; X86-BMI-NEXT:    adcl %esi, %ebx
-; X86-BMI-NEXT:    movl (%esp), %edx # 4-byte Reload
-; X86-BMI-NEXT:    mulxl {{[0-9]+}}(%esp), %eax, %esi
-; X86-BMI-NEXT:    setb %dl
-; X86-BMI-NEXT:    addl %ebx, %eax
-; X86-BMI-NEXT:    movzbl %dl, %edx
-; X86-BMI-NEXT:    adcl %edx, %esi
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-BMI-NEXT:    xorl %ebx, %ebx
-; X86-BMI-NEXT:    mulxl %ebx, %ebx, %edx
-; X86-BMI-NEXT:    movl %edx, (%esp) # 4-byte Spill
-; X86-BMI-NEXT:    movl %ecx, %edx
-; X86-BMI-NEXT:    xorl %ecx, %ecx
-; X86-BMI-NEXT:    mulxl %ecx, %ecx, %edx
-; X86-BMI-NEXT:    addl %ebx, %ecx
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; X86-BMI-NEXT:    adcl (%esp), %edx # 4-byte Folded Reload
-; X86-BMI-NEXT:    addl %eax, %ecx
-; X86-BMI-NEXT:    adcl %esi, %edx
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X86-BMI-NEXT:    addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload
-; X86-BMI-NEXT:    adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload
-; X86-BMI-NEXT:    adcl $0, %ecx
-; X86-BMI-NEXT:    adcl $0, %edx
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI-NEXT:    movl %esi, (%eax,%edi,8)
-; X86-BMI-NEXT:    movl %ebp, 4(%eax,%edi,8)
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-BMI-NEXT:    movl %eax, %esi
-; X86-BMI-NEXT:    addl $1, %edi
-; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
-; X86-BMI-NEXT:    adcl $0, %ebp
-; X86-BMI-NEXT:    movl %edi, %eax
-; X86-BMI-NEXT:    xorl %esi, %eax
-; X86-BMI-NEXT:    movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill
-; X86-BMI-NEXT:    movl %ebp, %esi
-; X86-BMI-NEXT:    xorl %ebx, %esi
-; X86-BMI-NEXT:    orl %eax, %esi
-; X86-BMI-NEXT:    jne .LBB1_2
-; X86-BMI-NEXT:  .LBB1_3: # %for.end
-; X86-BMI-NEXT:    xorl %eax, %eax
-; X86-BMI-NEXT:    xorl %edx, %edx
-; X86-BMI-NEXT:    addl $20, %esp
-; X86-BMI-NEXT:    popl %esi
-; X86-BMI-NEXT:    popl %edi
-; X86-BMI-NEXT:    popl %ebx
-; X86-BMI-NEXT:    popl %ebp
-; X86-BMI-NEXT:    retl
-;
-; X64-NOBMI-LABEL: mul1:
-; X64-NOBMI:       # %bb.0: # %entry
-; X64-NOBMI-NEXT:    testq %rdi, %rdi
-; X64-NOBMI-NEXT:    je .LBB1_3
-; X64-NOBMI-NEXT:  # %bb.1: # %for.body.preheader
-; X64-NOBMI-NEXT:    movq %rcx, %r8
-; X64-NOBMI-NEXT:    movq %rdx, %r9
-; X64-NOBMI-NEXT:    xorl %r10d, %r10d
-; X64-NOBMI-NEXT:    xorl %ecx, %ecx
-; X64-NOBMI-NEXT:    .p2align 4, 0x90
-; X64-NOBMI-NEXT:  .LBB1_2: # %for.body
-; X64-NOBMI-NEXT:    # =>This Inner Loop Header: Depth=1
-; X64-NOBMI-NEXT:    movq %r8, %rax
-; X64-NOBMI-NEXT:    mulq (%r9,%rcx,8)
-; X64-NOBMI-NEXT:    addq %r10, %rax
-; X64-NOBMI-NEXT:    adcq $0, %rdx
-; X64-NOBMI-NEXT:    movq %rax, (%rsi,%rcx,8)
-; X64-NOBMI-NEXT:    incq %rcx
-; X64-NOBMI-NEXT:    cmpq %rcx, %rdi
-; X64-NOBMI-NEXT:    movq %rdx, %r10
-; X64-NOBMI-NEXT:    jne .LBB1_2
-; X64-NOBMI-NEXT:  .LBB1_3: # %for.end
-; X64-NOBMI-NEXT:    xorl %eax, %eax
-; X64-NOBMI-NEXT:    retq
-;
-; X64-BMI-LABEL: mul1:
-; X64-BMI:       # %bb.0: # %entry
-; X64-BMI-NEXT:    testq %rdi, %rdi
-; X64-BMI-NEXT:    je .LBB1_3
-; X64-BMI-NEXT:  # %bb.1: # %for.body.preheader
-; X64-BMI-NEXT:    movq %rcx, %r8
-; X64-BMI-NEXT:    movq %rdx, %r9
-; X64-BMI-NEXT:    xorl %r10d, %r10d
-; X64-BMI-NEXT:    xorl %ecx, %ecx
-; X64-BMI-NEXT:    .p2align 4, 0x90
-; X64-BMI-NEXT:  .LBB1_2: # %for.body
-; X64-BMI-NEXT:    # =>This Inner Loop Header: Depth=1
-; X64-BMI-NEXT:    movq %r8, %rdx
-; X64-BMI-NEXT:    mulxq (%r9,%rcx,8), %rax, %rdx
-; X64-BMI-NEXT:    addq %r10, %rax
-; X64-BMI-NEXT:    adcq $0, %rdx
-; X64-BMI-NEXT:    movq %rax, (%rsi,%rcx,8)
-; X64-BMI-NEXT:    incq %rcx
-; X64-BMI-NEXT:    cmpq %rcx, %rdi
-; X64-BMI-NEXT:    movq %rdx, %r10
-; X64-BMI-NEXT:    jne .LBB1_2
-; X64-BMI-NEXT:  .LBB1_3: # %for.end
-; X64-BMI-NEXT:    xorl %eax, %eax
-; X64-BMI-NEXT:    retq
+; X86-LABEL: mul1:
+; X86:       # %bb.0: # %entry
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $28, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    je .LBB1_3
+; X86-NEXT:  # %bb.1: # %for.body.preheader
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    .p2align 4, 0x90
+; X86-NEXT:  .LBB1_2: # %for.body
+; X86-NEXT:    # =>This Inner Loop Header: Depth=1
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl (%eax,%ebx,8), %ebp
+; X86-NEXT:    movl 4(%eax,%ebx,8), %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    mull %ecx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %edi, %esi
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    mull %edx
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    addl %esi, %edi
+; X86-NEXT:    adcl %ecx, %ebp
+; X86-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    mull {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    addl %ebp, %esi
+; X86-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; X86-NEXT:    adcl %eax, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    mull %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    mull %edx
+; X86-NEXT:    addl %ebp, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    addl %esi, %eax
+; X86-NEXT:    adcl %ecx, %edx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    adcl $0, %eax
+; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %esi, (%ecx,%ebx,8)
+; X86-NEXT:    movl %edi, 4(%ecx,%ebx,8)
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    addl $1, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    adcl $0, %esi
+; X86-NEXT:    movl %ebx, %ecx
+; X86-NEXT:    xorl %ebp, %ecx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    xorl %edi, %esi
+; X86-NEXT:    orl %ecx, %esi
+; X86-NEXT:    jne .LBB1_2
+; X86-NEXT:  .LBB1_3: # %for.end
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    addl $28, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+;
+; X64-LABEL: mul1:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    testq %rdi, %rdi
+; X64-NEXT:    je .LBB1_3
+; X64-NEXT:  # %bb.1: # %for.body.preheader
+; X64-NEXT:    movq %rcx, %r8
+; X64-NEXT:    movq %rdx, %r9
+; X64-NEXT:    xorl %r10d, %r10d
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB1_2: # %for.body
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movq %r8, %rax
+; X64-NEXT:    mulq (%r9,%rcx,8)
+; X64-NEXT:    addq %r10, %rax
+; X64-NEXT:    adcq $0, %rdx
+; X64-NEXT:    movq %rax, (%rsi,%rcx,8)
+; X64-NEXT:    incq %rcx
+; X64-NEXT:    cmpq %rcx, %rdi
+; X64-NEXT:    movq %rdx, %r10
+; X64-NEXT:    jne .LBB1_2
+; X64-NEXT:  .LBB1_3: # %for.end
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    retq
 entry:
   %conv = zext i64 %y to i128
   %cmp11 = icmp eq i64 %n, 0

Modified: llvm/trunk/test/CodeGen/X86/mulx32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mulx32.ll?rev=348975&r1=348974&r2=348975&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mulx32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mulx32.ll Wed Dec 12 13:21:31 2018
@@ -5,8 +5,8 @@
 define i64 @f1(i32 %a, i32 %b) {
 ; CHECK-LABEL: f1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT:    mulxl {{[0-9]+}}(%esp), %eax, %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    retl
   %x = zext i32 %a to i64
   %y = zext i32 %b to i64
@@ -17,9 +17,9 @@ define i64 @f1(i32 %a, i32 %b) {
 define i64 @f2(i32 %a, i32* %p) {
 ; CHECK-LABEL: f2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    mulxl (%eax), %eax, %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    mull (%ecx)
 ; CHECK-NEXT:    retl
   %b = load i32, i32* %p
   %x = zext i32 %a to i64

Modified: llvm/trunk/test/CodeGen/X86/mulx64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mulx64.ll?rev=348975&r1=348974&r2=348975&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mulx64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mulx64.ll Wed Dec 12 13:21:31 2018
@@ -5,8 +5,8 @@
 define i128 @f1(i64 %a, i64 %b) {
 ; CHECK-LABEL: f1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rdx
-; CHECK-NEXT:    mulxq %rsi, %rax, %rdx
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    mulq %rsi
 ; CHECK-NEXT:    retq
   %x = zext i64 %a to i128
   %y = zext i64 %b to i128
@@ -17,8 +17,8 @@ define i128 @f1(i64 %a, i64 %b) {
 define i128 @f2(i64 %a, i64* %p) {
 ; CHECK-LABEL: f2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq %rdi, %rdx
-; CHECK-NEXT:    mulxq (%rsi), %rax, %rdx
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    mulq (%rsi)
 ; CHECK-NEXT:    retq
   %b = load i64, i64* %p
   %x = zext i64 %a to i128

Modified: llvm/trunk/test/CodeGen/X86/pr35636.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr35636.ll?rev=348975&r1=348974&r2=348975&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr35636.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr35636.ll Wed Dec 12 13:21:31 2018
@@ -5,11 +5,11 @@
 define void @_Z15uint64_to_asciimPc(i64 %arg) {
 ; HSW-LABEL: _Z15uint64_to_asciimPc:
 ; HSW:       # %bb.0: # %bb
-; HSW-NEXT:    movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81
-; HSW-NEXT:    movq %rdi, %rdx
-; HSW-NEXT:    mulxq %rax, %rax, %rcx
-; HSW-NEXT:    shrq $42, %rcx
-; HSW-NEXT:    imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1
+; HSW-NEXT:    movq %rdi, %rax
+; HSW-NEXT:    movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81
+; HSW-NEXT:    mulq %rcx
+; HSW-NEXT:    shrq $42, %rdx
+; HSW-NEXT:    imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1
 ; HSW-NEXT:    shrq $20, %rax
 ; HSW-NEXT:    leal (%rax,%rax,4), %eax
 ; HSW-NEXT:    addl $5, %eax
@@ -22,11 +22,11 @@ define void @_Z15uint64_to_asciimPc(i64
 ;
 ; ZN-LABEL: _Z15uint64_to_asciimPc:
 ; ZN:       # %bb.0: # %bb
-; ZN-NEXT:    movabsq $811296384146066817, %rax # imm = 0xB424DC35095CD81
-; ZN-NEXT:    movq %rdi, %rdx
-; ZN-NEXT:    mulxq %rax, %rax, %rcx
-; ZN-NEXT:    shrq $42, %rcx
-; ZN-NEXT:    imulq $281474977, %rcx, %rax # imm = 0x10C6F7A1
+; ZN-NEXT:    movq %rdi, %rax
+; ZN-NEXT:    movabsq $811296384146066817, %rcx # imm = 0xB424DC35095CD81
+; ZN-NEXT:    mulq %rcx
+; ZN-NEXT:    shrq $42, %rdx
+; ZN-NEXT:    imulq $281474977, %rdx, %rax # imm = 0x10C6F7A1
 ; ZN-NEXT:    shrq $20, %rax
 ; ZN-NEXT:    leal 5(%rax,%rax,4), %eax
 ; ZN-NEXT:    andl $134217727, %eax # imm = 0x7FFFFFF




More information about the llvm-commits mailing list