[PATCH] D33328: [CodeGen] Pessimize aliasing for union members (and may-alias) objects

Thu May 18 11:22:37 PDT 2017

kparzysz created this revision.

Use the TBAA info of the omnipotent char for these objects.

This works for the C testcase included in the patch, but does not work for the C++ testcase:

  // Testcase from llvm.org/PR32056
  
  extern "C" int printf (const char *__restrict __format, ...);
  
  typedef double __m256d __attribute__((__vector_size__(32)));
  
  static __inline __m256d __attribute__((__always_inline__, __nodebug__,
                                         __target__("avx")))
  _mm256_setr_pd(double __a, double __b, double __c, double __d) {
    return (__m256d){ __a, __b, __c, __d };
  }
  
  struct A {
    A () {
      a = _mm256_setr_pd(0.0, 1.0, 2.0, 3.0);
      b = _mm256_setr_pd(4.0, 5.0, 6.0, 7.0);
    }
  
    const double *begin() { return c; }
    const double *end() { return c+8; }
  
    union {
      struct { __m256d a, b; };
      double c[8];
    };
  };
  
  int main(int argc, char *argv[]) {
    A a;
    for (double value : a)
      printf("%f ", value);
    return 0;
  }

The interesting functions (`A::A` and `_mm256_setr_pd`) are:

  ; Function Attrs: nounwind
  define linkonce_odr void @_ZN1AC2Ev(%struct.A* %this) unnamed_addr #2 comdat align 2 {
  entry:
    %this.addr = alloca %struct.A*, align 8
    store %struct.A* %this, %struct.A** %this.addr, align 8, !tbaa !5
    %this1 = load %struct.A*, %struct.A** %this.addr, align 8
    %call = call <4 x double> @_ZL14_mm256_setr_pddddd(double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00)
    %0 = getelementptr inbounds %struct.A, %struct.A* %this1, i32 0, i32 0
    %1 = bitcast %union.anon* %0 to %struct.anon*
    %a = getelementptr inbounds %struct.anon, %struct.anon* %1, i32 0, i32 0
    store <4 x double> %call, <4 x double>* %a, align 32, !tbaa !10
    %call2 = call <4 x double> @_ZL14_mm256_setr_pddddd(double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00)
    %2 = getelementptr inbounds %struct.A, %struct.A* %this1, i32 0, i32 0
    %3 = bitcast %union.anon* %2 to %struct.anon*
    %b = getelementptr inbounds %struct.anon, %struct.anon* %3, i32 0, i32 1
    store <4 x double> %call2, <4 x double>* %b, align 32, !tbaa !12
    ret void
  }
  
  ; Function Attrs: alwaysinline nounwind
  define internal <4 x double> @_ZL14_mm256_setr_pddddd(double %__a, double %__b, double %__c, double %__d) #4 {
  entry:
    %__a.addr = alloca double, align 8
    %__b.addr = alloca double, align 8
    %__c.addr = alloca double, align 8
    %__d.addr = alloca double, align 8
    %.compoundliteral = alloca <4 x double>, align 32
    store double %__a, double* %__a.addr, align 8, !tbaa !8
    store double %__b, double* %__b.addr, align 8, !tbaa !8
    store double %__c, double* %__c.addr, align 8, !tbaa !8
    store double %__d, double* %__d.addr, align 8, !tbaa !8
    %0 = load double, double* %__a.addr, align 8, !tbaa !8
    %vecinit = insertelement <4 x double> undef, double %0, i32 0
    %1 = load double, double* %__b.addr, align 8, !tbaa !8
    %vecinit1 = insertelement <4 x double> %vecinit, double %1, i32 1
    %2 = load double, double* %__c.addr, align 8, !tbaa !8
    %vecinit2 = insertelement <4 x double> %vecinit1, double %2, i32 2
    %3 = load double, double* %__d.addr, align 8, !tbaa !8
    %vecinit3 = insertelement <4 x double> %vecinit2, double %3, i32 3
    store <4 x double> %vecinit3, <4 x double>* %.compoundliteral, align 32, !tbaa !7
    %4 = load <4 x double>, <4 x double>* %.compoundliteral, align 32, !tbaa !7
    ret <4 x double> %4
  }
  
  !0 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git aa148e69595b17752b15b62d9b456b586b1784bd) (http://llvm.org/git/llvm.git dcaf742ea29412ae8ee0d2daf88bfd5d71253038)"}
  !1 = !{!2, !2, i64 0}
  !2 = !{!"int", !3, i64 0}
  !3 = !{!"omnipotent char", !4, i64 0}
  !4 = !{!"Simple C++ TBAA"}
  !5 = !{!6, !6, i64 0}
  !6 = !{!"any pointer", !3, i64 0}
  !7 = !{!3, !3, i64 0}
  !8 = !{!9, !9, i64 0}
  !9 = !{!"double", !3, i64 0}
  !10 = !{!11, !3, i64 0}
  !11 = !{!"_ZTSN1AUt_Ut_E", !3, i64 0, !3, i64 32}
  !12 = !{!11, !3, i64 32}

This patch does not alias the stores to "double" with the stores to <4 x double>.  Based on the previous discussions it seems that we want such aliasing to exist, but I'm not sure how it should be represented.

The final code after all optimizations clearly shows junk getting printed:

  define i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 {
  entry:
    %a = alloca %struct.A, align 32
    %0 = bitcast %struct.A* %a to i8*
    call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) #3
    %a.i.i = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0, i32 0, i32 0
    store <4 x double> <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>, <4 x double>* %a.i.i, align 32, !tbaa !1
    %b.i.i = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0, i32 0, i32 1
    store <4 x double> <double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>, <4 x double>* %b.i.i, align 32, !tbaa !5
    %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 0.000000e+00)
    %call2.1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 1.000000e+00)
    %call2.2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 2.000000e+00)
    %call2.3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 3.000000e+00)
    %call2.4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 4.000000e+00)
    %call2.5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double undef)
    %incdec.ptr.5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0, i32 0, i32 0, i64 6
    %1 = load double, double* %incdec.ptr.5, align 16, !tbaa !6
    %call2.6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %1)
    %incdec.ptr.6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0, i32 0, i32 0, i64 7
    %2 = load double, double* %incdec.ptr.6, align 8, !tbaa !6
    %call2.7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %2)
    call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) #3
    ret i32 0
  }


Repository:
  rL LLVM

https://reviews.llvm.org/D33328

Files:
  lib/CodeGen/CGExpr.cpp
  test/CodeGen/union-tbaa1.c


Index: test/CodeGen/union-tbaa1.c
===================================================================

--- /dev/null
+++ test/CodeGen/union-tbaa1.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 %s -triple hexagon-unknown-elf -O2 -emit-llvm -o - | FileCheck %s
+
+typedef union __attribute__((aligned(4))) {
+  unsigned short uh[2];
+  unsigned uw;
+} vect32;
+
+void bar(vect32 p[][2]);
+
+// CHECK-LABEL: define void @fred
+void fred(unsigned Num, int Vec[2], int *Index, int Arr[4][2]) {
+  vect32 Tmp[4][2];
+// Generate tbaa for the load of Index:
+// CHECK: load i32, i32* %Index{{.*}}tbaa
+// But no tbaa for the two stores:
+// CHECK: %uw[[UW1:[0-9]*]] = getelementptr
+// CHECK: store{{.*}}%uw[[UW1]]
+// CHECK: tbaa ![[OCPATH:[0-9]+]]
+// There will be a load after the store, and it will use tbaa. Make sure
+// the check-not above doesn't find it:
+// CHECK: load
+  Tmp[*Index][0].uw = Arr[*Index][0] * Num;
+// CHECK: %uw[[UW2:[0-9]*]] = getelementptr
+// CHECK: store{{.*}}%uw[[UW2]]
+// CHECK: tbaa ![[OCPATH]]
+  Tmp[*Index][1].uw = Arr[*Index][1] * Num;
+// Same here, don't generate tbaa for the loads:
+// CHECK: %uh[[UH1:[0-9]*]] = bitcast %union.vect32
+// CHECK: %arrayidx[[AX1:[0-9]*]] = getelementptr{{.*}}%uh[[UH1]]
+// CHECK: load i16, i16* %arrayidx[[AX1]]
+// CHECK: tbaa ![[OCPATH]]
+// CHECK: store
+  Vec[0] = Tmp[*Index][0].uh[1];
+// CHECK: %uh[[UH2:[0-9]*]] = bitcast %union.vect32
+// CHECK: %arrayidx[[AX2:[0-9]*]] = getelementptr{{.*}}%uh[[UH2]]
+// CHECK: load i16, i16* %arrayidx[[AX2]]
+// CHECK: tbaa ![[OCPATH]]
+// CHECK: store
+  Vec[1] = Tmp[*Index][1].uh[1];
+  bar(Tmp);
+}
+
+// CHECK: ![[CHAR:[0-9]+]] = !{!"omnipotent char"
+// CHECK: ![[OCPATH]] = !{![[CHAR]]
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -1432,6 +1432,8 @@
     Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
   }
   if (TBAAInfo) {
+    if (BaseInfo.getMayAlias())
+      TBAAInfo = CGM.getTBAAInfo(getContext().CharTy);
     llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
                                                       TBAAOffset);
     if (TBAAPath)
@@ -1522,6 +1524,8 @@
     Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
   }
   if (TBAAInfo) {
+    if (BaseInfo.getMayAlias())
+      TBAAInfo = CGM.getTBAAInfo(getContext().CharTy);
     llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
                                                       TBAAOffset);
     if (TBAAPath)
@@ -3535,6 +3539,11 @@
     getFieldAlignmentSource(BaseInfo.getAlignmentSource());
   LValueBaseInfo FieldBaseInfo(fieldAlignSource, BaseInfo.getMayAlias());
 
+  const RecordDecl *rec = field->getParent();
+  bool mayAlias = rec->isUnion() || rec->hasAttr<MayAliasAttr>();
+  if (mayAlias)
+    FieldBaseInfo.setMayAlias(true);
+
   if (field->isBitField()) {
     const CGRecordLayout &RL =
       CGM.getTypes().getCGRecordLayout(field->getParent());
@@ -3556,11 +3565,7 @@
     return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo);
   }
 
-  const RecordDecl *rec = field->getParent();
   QualType type = field->getType();
-
-  bool mayAlias = rec->hasAttr<MayAliasAttr>();
-
   Address addr = base.getAddress();
   unsigned cvr = base.getVRQualifiers();
   bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D33328.99465.patch
Type: text/x-patch
Size: 3433 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170518/bbfc9a6e/attachment.bin>