[llvm-commits] [llvm] r153617 - in /llvm/trunk: lib/Target/X86/X86ISelDAGToDAG.cpp test/CodeGen/X86/dec-eflags-lower.ll test/CodeGen/X86/rd-mod-wr-eflags.ll

Joel Jones joel_k_jones at apple.com
Wed Mar 28 17:37:47 PDT 2012


Author: joel_k_jones
Date: Wed Mar 28 19:37:47 2012
New Revision: 153617

URL: http://llvm.org/viewvc/llvm-project?rev=153617&view=rev
Log:
For X86, change load/dec-or-inc/store into dec-or-inc, respectively.

This is a code change to add support for changing instruction sequences of the form:

  load
  inc/dec of 8/16/32/64 bits
  store

into the appropriate X86 inc/dec through memory instruction:

  inc[qlwb] / dec[qlwb]

The checks that were in X86DAGToDAGISel::Select(SDNode *Node)>>ISD::STORE have been extracted to isLoadIncOrDecStore and reworked to use the better
named wrappers for getOperand(unsigned) (e.g. getOffset()) and replaced Chain.getNode() with LoadNode.  The comments have also been expanded.

Added:
    llvm/trunk/test/CodeGen/X86/rd-mod-wr-eflags.ll
      - copied, changed from r153614, llvm/trunk/test/CodeGen/X86/dec-eflags-lower.ll
Removed:
    llvm/trunk/test/CodeGen/X86/dec-eflags-lower.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=153617&r1=153616&r2=153617&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed Mar 28 19:37:47 2012
@@ -1847,6 +1847,86 @@
   return true;
 }
 
+/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
+/// is suitable for doing the {load; increment or decrement; store} to modify
+/// transformation.
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, 
+                                SDValue &StoredVal) {
+
+  // is the value stored the result of a DEC or INC?
+  if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
+
+  // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
+  SDValue Chain = StoreNode->getChain();
+  LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode());
+  EVT LdVT = LoadNode->getMemoryVT();    
+  if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && 
+      LdVT != MVT::i8)
+    return false;
+
+  // quick check of whether the store is simple
+  SDValue Undef = StoreNode->getOffset();
+  if (Undef->getOpcode() != ISD::UNDEF) return false;
+
+  // is the chain predecessor to the store a load?
+  if (Chain->getOpcode() != ISD::LOAD) return false;
+  
+  // is the stored value result 0 of the load?
+  if (StoredVal.getResNo() != 0) return false;
+
+  // are there other uses of the loaded value than the inc or dec?
+  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
+
+  // is there exactly one use of the load?
+  if (!LoadNode->hasNUsesOfValue(1, 0)) return false;
+  
+  // are the load and store connected by the chain?
+  if (StoredVal->getOperand(0).getNode() != LoadNode) return false;
+
+  //OPC_CheckPredicate, 1, // Predicate_nontemporalstore
+  if (StoreNode->isNonTemporal())
+    return false;
+
+  // is the address of the store the same as the load?
+  SDValue Address = StoreNode->getBasePtr();
+  if (LoadNode->getBasePtr() != Address ||
+      LoadNode->getOffset() != Undef)
+    return false;
+
+  // is the load non-extending and non-indexed?
+  if (!ISD::isNormalLoad(LoadNode))
+    return false;
+
+  // is the store non-extending and non-indexed?
+  if (!ISD::isNormalStore(StoreNode))
+    return false;
+
+  // check load chain has only one use (from the store)
+  if (!Chain.hasOneUse())
+    return false;
+
+  return true;
+}
+
+/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory 
+/// increment or decrement. Opc should be X86ISD::DEC or X86ISD:INC.
+static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
+  if (Opc == X86ISD::DEC) {
+    if (LdVT == MVT::i64) return X86::DEC64m;
+    if (LdVT == MVT::i32) return X86::DEC32m;
+    if (LdVT == MVT::i16) return X86::DEC16m;
+    if (LdVT == MVT::i8)  return X86::DEC8m;
+    assert(0 && "unrecognized size for LdVT");
+  }
+  else {
+    if (LdVT == MVT::i64) return X86::INC64m;
+    if (LdVT == MVT::i32) return X86::INC32m;
+    if (LdVT == MVT::i16) return X86::INC16m;
+    if (LdVT == MVT::i8)  return X86::INC8m;
+    assert(0 && "unrecognized size for LdVT");
+  }
+}
+
 SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   EVT NVT = Node->getValueType(0);
   unsigned Opc, MOpc;
@@ -2354,9 +2434,13 @@
     break;
   }
   case ISD::STORE: {
+    // Change a chain of {load; incr or dec; store} of the same value into
+    // a simple increment or decrement through memory of that value, if the
+    // uses of the modified value and its address are suitable.
     // The DEC64m tablegen pattern is currently not able to match the case where
-    // the EFLAGS on the original DEC are used.
-    // we'll need to improve tablegen to allow flags to be transferred from a
+    // the EFLAGS on the original DEC are used. (This also applies to 
+    // {INC,DEC}X{64,32,16,8}.)
+    // We'll need to improve tablegen to allow flags to be transferred from a
     // node in the pattern to the result node.  probably with a new keyword
     // for example, we have this
     // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
@@ -2366,42 +2450,16 @@
     // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
     //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
     //   (transferrable EFLAGS)]>;
+
     StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
-    SDValue Chain = StoreNode->getOperand(0);
     SDValue StoredVal = StoreNode->getOperand(1);
-    SDValue Address = StoreNode->getOperand(2);
-    SDValue Undef = StoreNode->getOperand(3);
+    unsigned Opc = StoredVal->getOpcode();
 
-    if (StoreNode->getMemOperand()->getSize() != 8 ||
-        Undef->getOpcode() != ISD::UNDEF ||
-        Chain->getOpcode() != ISD::LOAD ||
-        StoredVal->getOpcode() != X86ISD::DEC ||
-        StoredVal.getResNo() != 0 ||
-        !StoredVal.getNode()->hasNUsesOfValue(1, 0) ||
-        !Chain.getNode()->hasNUsesOfValue(1, 0) ||
-        StoredVal->getOperand(0).getNode() != Chain.getNode())
-      break;
-
-    //OPC_CheckPredicate, 1, // Predicate_nontemporalstore
-    if (StoreNode->isNonTemporal())
-      break;
-
-    LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode());
-    if (LoadNode->getOperand(1) != Address ||
-        LoadNode->getOperand(2) != Undef)
-      break;
-
-    if (!ISD::isNormalLoad(LoadNode))
-      break;
-
-    if (!ISD::isNormalStore(StoreNode))
-      break;
-
-    // check load chain has only one use (from the store)
-    if (!Chain.hasOneUse())
-      break;
+    if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal)) break;
 
     // Merge the input chains if they are not intra-pattern references.
+    SDValue Chain = StoreNode->getOperand(0);
+    LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode());
     SDValue InputChain = LoadNode->getOperand(0);
 
     SDValue Base, Scale, Index, Disp, Segment;
@@ -2413,7 +2471,9 @@
     MemOp[0] = StoreNode->getMemOperand();
     MemOp[1] = LoadNode->getMemOperand();
     const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
-    MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m,
+    EVT LdVT = LoadNode->getMemoryVT();    
+    unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
+    MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
                                                    Node->getDebugLoc(),
                                                    MVT::i32, MVT::Other, Ops,
                                                    array_lengthof(Ops));

Removed: llvm/trunk/test/CodeGen/X86/dec-eflags-lower.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/dec-eflags-lower.ll?rev=153616&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dec-eflags-lower.ll (original)
+++ llvm/trunk/test/CodeGen/X86/dec-eflags-lower.ll (removed)
@@ -1,67 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
-%struct.obj = type { i64 }
-
-; CHECK: _Z7releaseP3obj
-define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp {
-entry:
-; CHECK: decq	(%{{rdi|rcx}})
-; CHECK-NEXT: je
-  %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0
-  %0 = load i64* %refcnt, align 8, !tbaa !0
-  %dec = add i64 %0, -1
-  store i64 %dec, i64* %refcnt, align 8, !tbaa !0
-  %tobool = icmp eq i64 %dec, 0
-  br i1 %tobool, label %if.end, label %return
-
-if.end:                                           ; preds = %entry
-  %1 = bitcast %struct.obj* %o to i8*
-  tail call void @free(i8* %1)
-  br label %return
-
-return:                                           ; preds = %entry, %if.end
-  ret void
-}
-
- at c = common global i64 0, align 8
- at a = common global i32 0, align 4
- at .str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
- at b = common global i32 0, align 4
-
-; CHECK: test
-define i32 @test() nounwind uwtable ssp {
-entry:
-; CHECK: decq
-; CHECK-NOT: decq
-%0 = load i64* @c, align 8, !tbaa !0
-%dec.i = add nsw i64 %0, -1
-store i64 %dec.i, i64* @c, align 8, !tbaa !0
-%tobool.i = icmp ne i64 %dec.i, 0
-%lor.ext.i = zext i1 %tobool.i to i32
-store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
-%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
-ret i32 0
-}
-
-; CHECK: test2
-define i32 @test2() nounwind uwtable ssp {
-entry:
-; CHECK-NOT: decq ({{.*}})
-%0 = load i64* @c, align 8, !tbaa !0
-%dec.i = add nsw i64 %0, -1
-store i64 %dec.i, i64* @c, align 8, !tbaa !0
-%tobool.i = icmp ne i64 %0, 0
-%lor.ext.i = zext i1 %tobool.i to i32
-store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
-%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
-ret i32 0
-}
-
-declare i32 @printf(i8* nocapture, ...) nounwind
-
-declare void @free(i8* nocapture) nounwind
-
-!0 = metadata !{metadata !"long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}

Copied: llvm/trunk/test/CodeGen/X86/rd-mod-wr-eflags.ll (from r153614, llvm/trunk/test/CodeGen/X86/dec-eflags-lower.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/rd-mod-wr-eflags.ll?p2=llvm/trunk/test/CodeGen/X86/rd-mod-wr-eflags.ll&p1=llvm/trunk/test/CodeGen/X86/dec-eflags-lower.ll&r1=153614&r2=153617&rev=153617&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/dec-eflags-lower.ll (original)
+++ llvm/trunk/test/CodeGen/X86/rd-mod-wr-eflags.ll Wed Mar 28 19:37:47 2012
@@ -65,3 +65,115 @@
 !1 = metadata !{metadata !"omnipotent char", metadata !2}
 !2 = metadata !{metadata !"Simple C/C++ TBAA", null}
 !3 = metadata !{metadata !"int", metadata !1}
+
+%struct.obj2 = type { i64, i32, i16, i8 }
+
+declare void @other(%struct.obj2* ) nounwind;
+
+; CHECK: example_dec
+define void @example_dec(%struct.obj2* %o) nounwind uwtable ssp {
+; 64 bit dec
+entry:
+  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+; CHECK-NOT: load (%{{rdi|rcs}})
+  %0 = load i64* %s64, align 8
+; CHECK: decq (%{{rdi|rcs}})
+  %dec = add i64 %0, -1
+  store i64 %dec, i64* %s64, align 8
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %if.end, label %return
+
+; 32 bit dec
+if.end:
+  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+; CHECK-NOT: load {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %1 = load i32* %s32, align 4
+; CHECK: decl {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %dec1 = add i32 %1, -1
+  store i32 %dec1, i32* %s32, align 4
+  %tobool2 = icmp eq i32 %dec1, 0
+  br i1 %tobool2, label %if.end1, label %return
+
+; 16 bit dec
+if.end1:
+  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+; CHECK-NOT: load {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %2 = load i16* %s16, align 2
+; CHECK: decw {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %dec2 = add i16 %2, -1
+  store i16 %dec2, i16* %s16, align 2
+  %tobool3 = icmp eq i16 %dec2, 0
+  br i1 %tobool3, label %if.end2, label %return
+
+; 8 bit dec
+if.end2:
+  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+; CHECK-NOT: load {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %3 = load i8* %s8
+; CHECK: decb {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %dec3 = add i8 %3, -1
+  store i8 %dec3, i8* %s8
+  %tobool4 = icmp eq i8 %dec3, 0
+  br i1 %tobool4, label %if.end4, label %return
+
+if.end4:
+  tail call void @other(%struct.obj2* %o) nounwind
+  br label %return
+
+return:                                           ; preds = %if.end4, %if.end, %entry                                                                                                                                                                               
+  ret void
+}
+
+; CHECK: example_inc
+define void @example_inc(%struct.obj2* %o) nounwind uwtable ssp {
+; 64 bit inc
+entry:
+  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+; CHECK-NOT: load (%{{rdi|rcs}})
+  %0 = load i64* %s64, align 8
+; CHECK: incq (%{{rdi|rcs}})
+  %inc = add i64 %0, 1
+  store i64 %inc, i64* %s64, align 8
+  %tobool = icmp eq i64 %inc, 0
+  br i1 %tobool, label %if.end, label %return
+
+; 32 bit inc
+if.end:
+  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+; CHECK-NOT: load {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %1 = load i32* %s32, align 4
+; CHECK: incl {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %inc1 = add i32 %1, 1
+  store i32 %inc1, i32* %s32, align 4
+  %tobool2 = icmp eq i32 %inc1, 0
+  br i1 %tobool2, label %if.end1, label %return
+
+; 16 bit inc
+if.end1:
+  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+; CHECK-NOT: load {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %2 = load i16* %s16, align 2
+; CHECK: incw {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %inc2 = add i16 %2, 1
+  store i16 %inc2, i16* %s16, align 2
+  %tobool3 = icmp eq i16 %inc2, 0
+  br i1 %tobool3, label %if.end2, label %return
+
+; 8 bit inc
+if.end2:
+  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+; CHECK-NOT: load {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %3 = load i8* %s8
+; CHECK: incb {{[0-9][0-9]*}}(%{{rdi|rcs}})
+  %inc3 = add i8 %3, 1
+  store i8 %inc3, i8* %s8
+  %tobool4 = icmp eq i8 %inc3, 0
+  br i1 %tobool4, label %if.end4, label %return
+
+if.end4:
+  tail call void @other(%struct.obj2* %o) nounwind
+  br label %return
+
+return:
+  ret void
+}





More information about the llvm-commits mailing list