[llvm-commits] [llvm] r122707 - in /llvm/trunk: lib/CodeGen/SelectionDAG/SelectionDAG.cpp test/CodeGen/X86/memset-2.ll

Benjamin Kramer benny.kra at googlemail.com
Sun Jan 2 11:44:58 PST 2011


Author: d0k
Date: Sun Jan  2 13:44:58 2011
New Revision: 122707

URL: http://llvm.org/viewvc/llvm-project?rev=122707&view=rev
Log:
Lower the i8 extension in memset to a multiply instead of a potentially long series of shifts and ors.

We could implement a DAGCombine to turn x * 0x0101 back into logic operations
on targets that doesn't support the multiply or it is slow (p4) if someone cares
enough.

Example code:
  void test(char *s, int a) {
      __builtin_memset(s, a, 4);
  }
before:
  _test:                                  ## @test
    movzbl  8(%esp), %eax
    movl  %eax, %ecx
    shll  $8, %ecx
    orl %eax, %ecx
    movl  %ecx, %eax
    shll  $16, %eax
    orl %ecx, %eax
    movl  4(%esp), %ecx
    movl  %eax, 4(%ecx)
    movl  %eax, (%ecx)
    ret
after:
  _test:                                  ## @test
    movzbl  8(%esp), %eax
    imull $16843009, %eax, %eax   ## imm = 0x1010101
    movl  4(%esp), %ecx
    movl  %eax, 4(%ecx)
    movl  %eax, (%ecx)
    ret

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/trunk/test/CodeGen/X86/memset-2.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=122707&r1=122706&r2=122707&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Sun Jan  2 13:44:58 2011
@@ -3132,6 +3132,17 @@
                  &ArgChains[0], ArgChains.size());
 }
 
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+  APInt Val = APInt(NumBits, ByteVal);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Val = (Val << Shift) | Val;
+    Shift <<= 1;
+  }
+  return Val;
+}
+
 /// getMemsetValue - Vectorized representation of the memset value
 /// operand.
 static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3140,27 +3151,18 @@
 
   unsigned NumBits = VT.getScalarType().getSizeInBits();
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
-    APInt Val = APInt(NumBits, C->getZExtValue() & 255);
-    unsigned Shift = 8;
-    for (unsigned i = NumBits; i > 8; i >>= 1) {
-      Val = (Val << Shift) | Val;
-      Shift <<= 1;
-    }
+    APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
     if (VT.isInteger())
       return DAG.getConstant(Val, VT);
     return DAG.getConstantFP(APFloat(Val), VT);
   }
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
-  unsigned Shift = 8;
-  for (unsigned i = NumBits; i > 8; i >>= 1) {
-    Value = DAG.getNode(ISD::OR, dl, VT,
-                        DAG.getNode(ISD::SHL, dl, VT, Value,
-                                    DAG.getConstant(Shift,
-                                                    TLI.getShiftAmountTy())),
-                        Value);
-    Shift <<= 1;
+  if (NumBits > 8) {
+    // Use a multiplication with 0x010101... to extend the input to the
+    // required length.
+    APInt Magic = SplatByte(NumBits, 0x01);
+    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
   }
 
   return Value;

Modified: llvm/trunk/test/CodeGen/X86/memset-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memset-2.ll?rev=122707&r1=122706&r2=122707&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memset-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memset-2.ll Sun Jan  2 13:44:58 2011
@@ -17,3 +17,14 @@
   call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
   unreachable
 }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t3(i8* nocapture %s, i8 %a) nounwind {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
+  ret void
+; CHECK: t3:
+; CHECK: imull $16843009
+}
+





More information about the llvm-commits mailing list