[llvm] r273617 - Codegen: [X86] preservere memory refs for folded umul_lohi

Thu Jun 23 14:40:36 PDT 2016

Author: iteratee
Date: Thu Jun 23 16:40:35 2016
New Revision: 273617

URL: http://llvm.org/viewvc/llvm-project?rev=273617&view=rev
Log:
Codegen: [X86] preservere memory refs for folded umul_lohi

Memory references were not being propagated for this folded load. This
prevented optimizations like LICM from hoisting the load.

Added test to verify that this allows LICM to proceed.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/trunk/test/CodeGen/X86/hoist-invariant-load.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=273617&r1=273616&r2=273617&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Thu Jun 23 16:40:35 2016
@@ -2223,24 +2223,32 @@ void X86DAGToDAGISel::Select(SDNode *Nod
 
     if (foldedLoad) {
       SDValue Chain;
+      MachineSDNode *CNode = nullptr;
       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
                         InFlag };
       if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
         SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+        CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
         ResHi = SDValue(CNode, 0);
         ResLo = SDValue(CNode, 1);
         Chain = SDValue(CNode, 2);
         InFlag = SDValue(CNode, 3);
       } else {
         SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
-        SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
+        CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
         Chain = SDValue(CNode, 0);
         InFlag = SDValue(CNode, 1);
       }
 
       // Update the chain.
       ReplaceUses(N1.getValue(1), Chain);
+      // Record the mem-refs
+      LoadSDNode *LoadNode = cast<LoadSDNode>(N1);
+      if (LoadNode) {
+        MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+        MemOp[0] = LoadNode->getMemOperand();
+        CNode->setMemRefs(MemOp, MemOp + 1);
+      }
     } else {
       SDValue Ops[] = { N1, InFlag };
       if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {

Modified: llvm/trunk/test/CodeGen/X86/hoist-invariant-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/hoist-invariant-load.ll?rev=273617&r1=273616&r2=273617&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/hoist-invariant-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/hoist-invariant-load.ll Thu Jun 23 16:40:35 2016
@@ -1,7 +1,10 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -stats -O2 2>&1 | grep "2 machine-licm"
+; RUN: llc -mcpu=haswell < %s -stats -O2 2>&1 | grep "4 machine-licm.*hoisted"
+; For test:
 ; 2 invariant loads, 1 for OBJC_SELECTOR_REFERENCES_
 ; and 1 for objc_msgSend from the GOT
+; For test_multi_def:
+; 2 invariant load (full multiply, both loads should be hoisted.)
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7.2"
@@ -29,4 +32,32 @@ for.end:
 
 declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 
+define void @test_multi_def(i64* dereferenceable(8) %x1,
+                            i64* dereferenceable(8) %x2,
+                            i128* %y, i64 %count) nounwind {
+entry:
+  br label %for.body
+
+for.check:
+  %inc = add nsw i64 %i, 1
+  %done = icmp sge i64 %inc, %count
+  br i1 %done, label %exit, label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %inc, %for.check ]
+  %x1_load = load i64, i64* %x1, align 8, !invariant.load !0
+  %x1_zext = zext i64 %x1_load to i128
+  %x2_load = load i64, i64* %x2, align 8, !invariant.load !0
+  %x2_zext = zext i64 %x2_load to i128
+  %x_prod = mul i128 %x1_zext, %x2_zext
+  %y_elem = getelementptr inbounds i128, i128* %y, i64 %i
+  %y_load = load i128, i128* %y_elem, align 8
+  %y_plus = add i128 %x_prod, %y_load
+  store i128 %y_plus, i128* %y_elem, align 8
+  br label %for.check
+
+exit:
+  ret void
+}
+
 !0 = !{}