[PATCH] D33328: [CodeGen] Pessimize aliasing for union members (and may-alias) objects
Krzysztof Parzyszek via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu May 18 11:22:37 PDT 2017
kparzysz created this revision.
Use the TBAA info of the omnipotent char for these objects.
This works for the C testcase included in the patch, but does not work for the C++ testcase:
// Testcase from llvm.org/PR32056
extern "C" int printf (const char *__restrict __format, ...);
typedef double __m256d __attribute__((__vector_size__(32)));
static __inline __m256d __attribute__((__always_inline__, __nodebug__,
__target__("avx")))
_mm256_setr_pd(double __a, double __b, double __c, double __d) {
return (__m256d){ __a, __b, __c, __d };
}
struct A {
A () {
a = _mm256_setr_pd(0.0, 1.0, 2.0, 3.0);
b = _mm256_setr_pd(4.0, 5.0, 6.0, 7.0);
}
const double *begin() { return c; }
const double *end() { return c+8; }
union {
struct { __m256d a, b; };
double c[8];
};
};
int main(int argc, char *argv[]) {
A a;
for (double value : a)
printf("%f ", value);
return 0;
}
The interesting functions (`A::A` and `_mm256_setr_pd`) are:
; Function Attrs: nounwind
define linkonce_odr void @_ZN1AC2Ev(%struct.A* %this) unnamed_addr #2 comdat align 2 {
entry:
%this.addr = alloca %struct.A*, align 8
store %struct.A* %this, %struct.A** %this.addr, align 8, !tbaa !5
%this1 = load %struct.A*, %struct.A** %this.addr, align 8
%call = call <4 x double> @_ZL14_mm256_setr_pddddd(double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00)
%0 = getelementptr inbounds %struct.A, %struct.A* %this1, i32 0, i32 0
%1 = bitcast %union.anon* %0 to %struct.anon*
%a = getelementptr inbounds %struct.anon, %struct.anon* %1, i32 0, i32 0
store <4 x double> %call, <4 x double>* %a, align 32, !tbaa !10
%call2 = call <4 x double> @_ZL14_mm256_setr_pddddd(double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00)
%2 = getelementptr inbounds %struct.A, %struct.A* %this1, i32 0, i32 0
%3 = bitcast %union.anon* %2 to %struct.anon*
%b = getelementptr inbounds %struct.anon, %struct.anon* %3, i32 0, i32 1
store <4 x double> %call2, <4 x double>* %b, align 32, !tbaa !12
ret void
}
; Function Attrs: alwaysinline nounwind
define internal <4 x double> @_ZL14_mm256_setr_pddddd(double %__a, double %__b, double %__c, double %__d) #4 {
entry:
%__a.addr = alloca double, align 8
%__b.addr = alloca double, align 8
%__c.addr = alloca double, align 8
%__d.addr = alloca double, align 8
%.compoundliteral = alloca <4 x double>, align 32
store double %__a, double* %__a.addr, align 8, !tbaa !8
store double %__b, double* %__b.addr, align 8, !tbaa !8
store double %__c, double* %__c.addr, align 8, !tbaa !8
store double %__d, double* %__d.addr, align 8, !tbaa !8
%0 = load double, double* %__a.addr, align 8, !tbaa !8
%vecinit = insertelement <4 x double> undef, double %0, i32 0
%1 = load double, double* %__b.addr, align 8, !tbaa !8
%vecinit1 = insertelement <4 x double> %vecinit, double %1, i32 1
%2 = load double, double* %__c.addr, align 8, !tbaa !8
%vecinit2 = insertelement <4 x double> %vecinit1, double %2, i32 2
%3 = load double, double* %__d.addr, align 8, !tbaa !8
%vecinit3 = insertelement <4 x double> %vecinit2, double %3, i32 3
store <4 x double> %vecinit3, <4 x double>* %.compoundliteral, align 32, !tbaa !7
%4 = load <4 x double>, <4 x double>* %.compoundliteral, align 32, !tbaa !7
ret <4 x double> %4
}
!0 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git aa148e69595b17752b15b62d9b456b586b1784bd) (http://llvm.org/git/llvm.git dcaf742ea29412ae8ee0d2daf88bfd5d71253038)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C++ TBAA"}
!5 = !{!6, !6, i64 0}
!6 = !{!"any pointer", !3, i64 0}
!7 = !{!3, !3, i64 0}
!8 = !{!9, !9, i64 0}
!9 = !{!"double", !3, i64 0}
!10 = !{!11, !3, i64 0}
!11 = !{!"_ZTSN1AUt_Ut_E", !3, i64 0, !3, i64 32}
!12 = !{!11, !3, i64 32}
This patch does not alias the stores to "double" with the stores to <4 x double>. Based on the previous discussions it seems that we want such aliasing to exist, but I'm not sure how it should be represented.
The final code after all optimizations clearly shows junk getting printed:
define i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 {
entry:
%a = alloca %struct.A, align 32
%0 = bitcast %struct.A* %a to i8*
call void @llvm.lifetime.start.p0i8(i64 64, i8* nonnull %0) #3
%a.i.i = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0, i32 0, i32 0
store <4 x double> <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>, <4 x double>* %a.i.i, align 32, !tbaa !1
%b.i.i = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0, i32 0, i32 1
store <4 x double> <double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>, <4 x double>* %b.i.i, align 32, !tbaa !5
%call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 0.000000e+00)
%call2.1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 1.000000e+00)
%call2.2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 2.000000e+00)
%call2.3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 3.000000e+00)
%call2.4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double 4.000000e+00)
%call2.5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double undef)
%incdec.ptr.5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0, i32 0, i32 0, i64 6
%1 = load double, double* %incdec.ptr.5, align 16, !tbaa !6
%call2.6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %1)
%incdec.ptr.6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0, i32 0, i32 0, i64 7
%2 = load double, double* %incdec.ptr.6, align 8, !tbaa !6
%call2.7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), double %2)
call void @llvm.lifetime.end.p0i8(i64 64, i8* nonnull %0) #3
ret i32 0
}
Repository:
rL LLVM
https://reviews.llvm.org/D33328
Files:
lib/CodeGen/CGExpr.cpp
test/CodeGen/union-tbaa1.c
Index: test/CodeGen/union-tbaa1.c
===================================================================
--- /dev/null
+++ test/CodeGen/union-tbaa1.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 %s -triple hexagon-unknown-elf -O2 -emit-llvm -o - | FileCheck %s
+
+typedef union __attribute__((aligned(4))) {
+ unsigned short uh[2];
+ unsigned uw;
+} vect32;
+
+void bar(vect32 p[][2]);
+
+// CHECK-LABEL: define void @fred
+void fred(unsigned Num, int Vec[2], int *Index, int Arr[4][2]) {
+ vect32 Tmp[4][2];
+// Generate tbaa for the load of Index:
+// CHECK: load i32, i32* %Index{{.*}}tbaa
+// But no tbaa for the two stores:
+// CHECK: %uw[[UW1:[0-9]*]] = getelementptr
+// CHECK: store{{.*}}%uw[[UW1]]
+// CHECK: tbaa ![[OCPATH:[0-9]+]]
+// There will be a load after the store, and it will use tbaa. Make sure
+// the check-not above doesn't find it:
+// CHECK: load
+ Tmp[*Index][0].uw = Arr[*Index][0] * Num;
+// CHECK: %uw[[UW2:[0-9]*]] = getelementptr
+// CHECK: store{{.*}}%uw[[UW2]]
+// CHECK: tbaa ![[OCPATH]]
+ Tmp[*Index][1].uw = Arr[*Index][1] * Num;
+// Same here, don't generate tbaa for the loads:
+// CHECK: %uh[[UH1:[0-9]*]] = bitcast %union.vect32
+// CHECK: %arrayidx[[AX1:[0-9]*]] = getelementptr{{.*}}%uh[[UH1]]
+// CHECK: load i16, i16* %arrayidx[[AX1]]
+// CHECK: tbaa ![[OCPATH]]
+// CHECK: store
+ Vec[0] = Tmp[*Index][0].uh[1];
+// CHECK: %uh[[UH2:[0-9]*]] = bitcast %union.vect32
+// CHECK: %arrayidx[[AX2:[0-9]*]] = getelementptr{{.*}}%uh[[UH2]]
+// CHECK: load i16, i16* %arrayidx[[AX2]]
+// CHECK: tbaa ![[OCPATH]]
+// CHECK: store
+ Vec[1] = Tmp[*Index][1].uh[1];
+ bar(Tmp);
+}
+
+// CHECK: ![[CHAR:[0-9]+]] = !{!"omnipotent char"
+// CHECK: ![[OCPATH]] = !{![[CHAR]]
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -1432,6 +1432,8 @@
Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
}
if (TBAAInfo) {
+ if (BaseInfo.getMayAlias())
+ TBAAInfo = CGM.getTBAAInfo(getContext().CharTy);
llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
TBAAOffset);
if (TBAAPath)
@@ -1522,6 +1524,8 @@
Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
}
if (TBAAInfo) {
+ if (BaseInfo.getMayAlias())
+ TBAAInfo = CGM.getTBAAInfo(getContext().CharTy);
llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
TBAAOffset);
if (TBAAPath)
@@ -3535,6 +3539,11 @@
getFieldAlignmentSource(BaseInfo.getAlignmentSource());
LValueBaseInfo FieldBaseInfo(fieldAlignSource, BaseInfo.getMayAlias());
+ const RecordDecl *rec = field->getParent();
+ bool mayAlias = rec->isUnion() || rec->hasAttr<MayAliasAttr>();
+ if (mayAlias)
+ FieldBaseInfo.setMayAlias(true);
+
if (field->isBitField()) {
const CGRecordLayout &RL =
CGM.getTypes().getCGRecordLayout(field->getParent());
@@ -3556,11 +3565,7 @@
return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo);
}
- const RecordDecl *rec = field->getParent();
QualType type = field->getType();
-
- bool mayAlias = rec->hasAttr<MayAliasAttr>();
-
Address addr = base.getAddress();
unsigned cvr = base.getVRQualifiers();
bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D33328.99465.patch
Type: text/x-patch
Size: 3433 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170518/bbfc9a6e/attachment.bin>
More information about the llvm-commits
mailing list