[llvm-commits] [llvm] r143582 - in /llvm/trunk: lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMISelLowering.h test/CodeGen/ARM/2011-10-26-memset-with-neon.ll

Lang Hames lhames at gmail.com
Wed Nov 2 15:52:45 PDT 2011


Author: lhames
Date: Wed Nov  2 17:52:45 2011
New Revision: 143582

URL: http://llvm.org/viewvc/llvm-project?rev=143582&view=rev
Log:
Try to lower memset/memcpy/memmove to vector instructions on ARM where the alignment permits.

Added:
    llvm/trunk/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
Modified:
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.h

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=143582&r1=143581&r2=143582&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed Nov  2 17:52:45 2011
@@ -8127,6 +8127,34 @@
   }
 }
 
+static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
+                       unsigned AlignCheck) {
+  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
+          (DstAlign == 0 || DstAlign % AlignCheck == 0));
+}
+
+EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
+                                           unsigned DstAlign, unsigned SrcAlign,
+                                           bool NonScalarIntSafe,
+                                           bool MemcpyStrSrc,
+                                           MachineFunction &MF) const {
+  const Function *F = MF.getFunction();
+
+  // See if we can use NEON instructions for this...
+  if (NonScalarIntSafe &&
+      !F->hasFnAttr(Attribute::NoImplicitFloat) &&
+      Subtarget->hasNEON()) {
+    if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
+      return MVT::v4i32;
+    } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
+      return MVT::v2i32;
+    }
+  }
+
+  // Let the target-independent logic figure it out.
+  return MVT::Other;
+}
+
 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
   if (V < 0)
     return false;

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=143582&r1=143581&r2=143582&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Wed Nov  2 17:52:45 2011
@@ -266,9 +266,14 @@
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
     /// unaligned memory accesses. of the specified type.
-    /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
     virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
 
+    virtual EVT getOptimalMemOpType(uint64_t Size,
+                                    unsigned DstAlign, unsigned SrcAlign,
+                                    bool NonScalarIntSafe,
+                                    bool MemcpyStrSrc,
+                                    MachineFunction &MF) const;
+
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;

Added: llvm/trunk/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll?rev=143582&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll Wed Nov  2 17:52:45 2011
@@ -0,0 +1,20 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
+
+; Should trigger a NEON store.
+; CHECK: vstr.64
+define void @f_0_12(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
+  ret void
+}
+
+; Trigger multiple NEON stores.
+; CHECK:      vstmia
+; CHECK-NEXT: vstmia
+define void @f_0_40(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind





More information about the llvm-commits mailing list