[llvm] r234735 - [ARM] Align global variables passed to memory intrinsics

Mon Apr 13 03:47:39 PDT 2015

Author: john.brawn
Date: Mon Apr 13 05:47:39 2015
New Revision: 234735

URL: http://llvm.org/viewvc/llvm-project?rev=234735&view=rev
Log:
[ARM] Align global variables passed to memory intrinsics

Fill in the TODO in CodeGenPrepare::OptimizeCallInst so that global
variables that are passed to memory intrinsics are aligned in the same
way that allocas are.

Differential Revision: http://reviews.llvm.org/D8421

Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
    llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll
    llvm/trunk/test/CodeGen/ARM/memfunc.ll

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=234735&r1=234734&r2=234735&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Mon Apr 13 05:47:39 2015
@@ -1288,13 +1288,25 @@ bool CodeGenPrepare::OptimizeCallInst(Ca
                      cast<PointerType>(Arg->getType())->getAddressSpace()), 0);
       Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*TD, Offset);
       uint64_t Offset2 = Offset.getLimitedValue();
+      if ((Offset2 & (PrefAlign-1)) != 0)
+        continue;
       AllocaInst *AI;
-      if ((Offset2 & (PrefAlign-1)) == 0 &&
-          (AI = dyn_cast<AllocaInst>(Val)) &&
+      if ((AI = dyn_cast<AllocaInst>(Val)) &&
           AI->getAlignment() < PrefAlign &&
           TD->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
         AI->setAlignment(PrefAlign);
-      // TODO: Also align GlobalVariables
+      // Global variables can only be aligned if they are defined in this
+      // object (i.e. they are uniquely initialized in this object), and
+      // over-aligning global variables that have an explicit section is
+      // forbidden.
+      GlobalVariable *GV;
+      if ((GV = dyn_cast<GlobalVariable>(Val)) &&
+          GV->hasUniqueInitializer() &&
+          !GV->hasSection() &&
+          GV->getAlignment() < PrefAlign &&
+          TD->getTypeAllocSize(
+            GV->getType()->getElementType()) >= MinSize + Offset2)
+        GV->setAlignment(PrefAlign);
     }
     // If this is a memcpy (or similar) then we may be able to improve the
     // alignment

Modified: llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll?rev=234735&r1=234734&r2=234735&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll Mon Apr 13 05:47:39 2015
@@ -30,7 +30,7 @@ entry:
 define void @t1(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t1:
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
 ; CHECK: adds r0, #15
 ; CHECK: adds r1, #15
@@ -48,7 +48,7 @@ entry:
 ; CHECK: str [[REG2]], [r0, #32]
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
   ret void
@@ -59,7 +59,7 @@ entry:
 ; CHECK-LABEL: t3:
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
-; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]
+; CHECK: vldr d{{[0-9]+}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
   ret void
@@ -68,7 +68,7 @@ entry:
 define void @t4(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t4:
-; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
+; CHECK: vld1.64 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
 ; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]!
 ; CHECK: strh [[REG5:r[0-9]+]], [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
@@ -97,11 +97,11 @@ entry:
 define void @t6() nounwind {
 entry:
 ; CHECK-LABEL: t6:
-; CHECK: vld1.8 {[[REG9:d[0-9]+]]}, [r0]
+; CHECK: vldr [[REG9:d[0-9]+]], [r0]
 ; CHECK: vstr [[REG9]], [r1]
 ; CHECK: adds r1, #6
 ; CHECK: adds r0, #6
-; CHECK: vld1.8
+; CHECK: vld1.16
 ; CHECK: vst1.16
 ; CHECK-T1-LABEL: t6:
 ; CHECK-T1: movs [[TREG5:r[0-9]]],

Modified: llvm/trunk/test/CodeGen/ARM/memfunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/memfunc.ll?rev=234735&r1=234734&r2=234735&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/memfunc.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/memfunc.ll Mon Apr 13 05:47:39 2015
@@ -3,22 +3,19 @@
 ; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
 ; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
 
- at from = common global [500 x i32] zeroinitializer, align 4
- at to = common global [500 x i32] zeroinitializer, align 4
-
-define void @f1() {
+define void @f1(i8* %dest, i8* %src) {
 entry:
   ; CHECK-LABEL: f1
 
   ; CHECK-IOS: memmove
   ; CHECK-DARWIN: memmove
   ; CHECK-EABI: __aeabi_memmove
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
 
   ; CHECK-IOS: memcpy
   ; CHECK-DARWIN: memcpy
   ; CHECK-EABI: __aeabi_memcpy
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
 
   ; EABI memset swaps arguments
   ; CHECK-IOS: mov r1, #0
@@ -27,7 +24,7 @@ entry:
   ; CHECK-DARWIN: memset
   ; CHECK-EABI: mov r2, #0
   ; CHECK-EABI: __aeabi_memset
-  call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false)
   unreachable
 }
 
@@ -281,6 +278,47 @@ entry:
   unreachable
 }
 
+; Check that global variables are aligned if they are large enough, but only if
+; they are defined in this object and don't have an explicit section.
+ at arr1 = global [7 x i8] c"\01\02\03\04\05\06\07", align 1
+ at arr2 = global [8 x i8] c"\01\02\03\04\05\06\07\08", align 1
+ at arr3 = global [7 x i8] c"\01\02\03\04\05\06\07", section "foo,bar", align 1
+ at arr4 = global [8 x i8] c"\01\02\03\04\05\06\07\08", section "foo,bar", align 1
+ at arr5 = weak global [7 x i8] c"\01\02\03\04\05\06\07", align 1
+ at arr6 = weak_odr global [7 x i8] c"\01\02\03\04\05\06\07", align 1
+ at arr7 = external global [7 x i8], align 1
+define void @f9(i8* %dest, i32 %n) {
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i32 1, i1 false)
+
+  unreachable
+}
+
+; CHECK: {{\.data|\.section.+data}}
+; CHECK-NOT: .align
+; CHECK: arr1:
+; CHECK-IOS: .align 3
+; CHECK-DARWIN: .align 2
+; CHECK-EABI: .align 2
+; CHECK: arr2:
+; CHECK: {{\.section.+foo,bar}}
+; CHECK-NOT: .align
+; CHECK: arr3:
+; CHECK-NOT: .align
+; CHECK: arr4:
+; CHECK: {{\.data|\.section.+data}}
+; CHECK-NOT: .align
+; CHECK: arr5:
+; CHECK-NOT: .align
+; CHECK: arr6:
+; CHECK-NOT: arr7:
+
 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind