[PATCH] D24980: [GlobalOpt]: See if it's possible to shrink Stores with Undef during global variable initialization to eliminate constructor functions

Aditya Nandakumar via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 27 11:44:38 PDT 2016


aditya_nandakumar created this revision.
aditya_nandakumar added reviewers: pcc, majnemer.
aditya_nandakumar added a subscriber: llvm-commits.
aditya_nandakumar set the repository for this revision to rL LLVM.

Suppose we have
@_ZL9aPosition = internal addrspace(2) global [1 x <3 x float>] zeroinitializer, align 16
and global_var_init function does 
store <4 x float> <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float undef>, <4 x float> addrspace(2)* bitcast ([1 x <3 x float>] addrspace(2)* @_ZL9aPosition to <4 x float> addrspace(2)*), !tbaa !7

We can optimize this init function away by checking if the store has trailing undefs and if the types become compatible by shrinking the stores.

Looking forward to your feedback.


Repository:
  rL LLVM

https://reviews.llvm.org/D24980

Files:
  lib/Transforms/Utils/Evaluator.cpp
  test/Transforms/GlobalOpt/ShrinkUndefStore.ll

Index: test/Transforms/GlobalOpt/ShrinkUndefStore.ll
===================================================================
--- /dev/null
+++ test/Transforms/GlobalOpt/ShrinkUndefStore.ll
@@ -0,0 +1,35 @@
+; RUN: opt -o - %s -globalopt -S | FileCheck %s
+; CHECK-NOT: ctor1
+ at _ZL9aPosition = internal addrspace(2) global [1 x <3 x float>] zeroinitializer, align 16
+ at llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__ctor1, i8* null }]
+
+; Function Attrs: nounwind optsize
+define <3 x float> @_Z4aFooi(i32 %i) #0 {
+entry:
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds [1 x <3 x float>], [1 x <3 x float>] addrspace(2)* @_ZL9aPosition, i32 0, i64 %idxprom
+  %castToVec4 = bitcast <3 x float> addrspace(2)* %arrayidx to <4 x float> addrspace(2)*
+  %loadVec4 = load <4 x float>, <4 x float> addrspace(2)* %castToVec4
+  %extractVec = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  ret <3 x float> %extractVec
+}
+
+; Function Attrs: nounwind optsize
+define internal fastcc void @__cxx_global_var_init() #0 section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+  %arrayidx = getelementptr inbounds [1 x <3 x float>], [1 x <3 x float>] addrspace(2)* @_ZL9aPosition, i32 0, i64 0
+  store <4 x float> <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float undef>, <4 x float> addrspace(2)* bitcast ([1 x <3 x float>] addrspace(2)* @_ZL9aPosition to <4 x float> addrspace(2)*), !tbaa !7
+  ret void
+}
+
+; Function Attrs: nounwind optsize
+define internal void @_GLOBAL__ctor1() #0 section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+  call fastcc void @__cxx_global_var_init()
+  ret void
+}
+
+attributes #0 = { nounwind optsize }
+!7 = !{!8, !8, i64 0}
+!8 = !{!"omnipotent char", !9, i64 0}
+!9 = !{!"Simple C/C++ TBAA"}
Index: lib/Transforms/Utils/Evaluator.cpp
===================================================================
--- lib/Transforms/Utils/Evaluator.cpp
+++ lib/Transforms/Utils/Evaluator.cpp
@@ -247,13 +247,51 @@
               IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
               Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
               Constant * const IdxList[] = {IdxZero, IdxZero};
-
               Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
               if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
                 Ptr = FoldedPtr;
-
-            // If we can't improve the situation by introspecting NewTy,
-            // we have to give up.
+            } else if (ArrayType *ATy = dyn_cast<ArrayType>(NewTy)) {
+              // See if we can remove undefs at the end of a vector to make
+              // the types compatible.
+              // For eg.
+              // @globalvariable1 = internal global [1 x <3 x float>]
+              // zeroinitializer
+              // store <4 x float> <float 4.000000e+00, float 4.000000e+00,
+              // float 4.000000e+00, float undef>,
+              // <4 x float> * bitcast ([1 x <3 x float>] * @globalvariable1 to
+              // <4 x float> *)
+              VectorType *ElemTy = dyn_cast<VectorType>(ATy->getElementType());
+              ConstantVector *Vec = dyn_cast<ConstantVector>(Val);
+              if (Vec && ElemTy) {
+                // See if the Constant vector has undefs at the end
+                // and can be truncated to ElemTy's size.
+                unsigned ValSize = Vec->getNumOperands();
+                // Make sure we're truncating and not extending
+                if ( ValSize <= ElemTy->getNumElements())
+                  return false;
+                for (unsigned i = ValSize; i > ElemTy->getNumElements(); --i) {
+                  Value *V = Vec->getOperand(i - 1);
+                  if (!dyn_cast<UndefValue>(V))
+                    // This is not an undef and we can't shrink the store
+                    return false;
+                }
+                // We know we can shrink it to ElemTy's size without losing
+                // data.
+                SmallVector<Constant *, 4> NewVec;
+                for (unsigned i = 0; i < ElemTy->getNumElements(); ++i)
+                  NewVec.push_back(Vec->getOperand(i));
+                Val = ConstantVector::get(NewVec);
+                IntegerType *IdxTy = IntegerType::get(Val->getContext(), 32);
+                Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+                Constant *const IdxList[] = {IdxZero, IdxZero};
+                Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
+                NewTy = ElemTy;
+              } else {
+
+                DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
+                                "evaluate.\n");
+                return false;
+              }
             } else {
               DEBUG(dbgs() << "Failed to bitcast constant ptr, can not "
                     "evaluate.\n");


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D24980.72680.patch
Type: text/x-patch
Size: 5023 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160927/17e0c6eb/attachment.bin>


More information about the llvm-commits mailing list