[llvm] 1de1070 - [DAGCombine] Fix alias analysis for unaligned accesses

David Green via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 28 10:46:11 PST 2020


Author: David Green
Date: 2020-02-28T18:44:36Z
New Revision: 1de10705594c7a2c9b8fde901c391bd84062ae04

URL: https://github.com/llvm/llvm-project/commit/1de10705594c7a2c9b8fde901c391bd84062ae04
DIFF: https://github.com/llvm/llvm-project/commit/1de10705594c7a2c9b8fde901c391bd84062ae04.diff

LOG: [DAGCombine] Fix alias analysis for unaligned accesses

The alias analysis in DAG Combine looks at the BaseAlign, the Offset and
the Size of two accesses, and determines if they are known to access
different parts of memory by the fact that they are different offsets
from inside that "alignment window". It does not seem to account for
accesses that are not a multiple of the size, and may overflow from one
alignment window into another.

For example in the test case we have a 19byte memset that is splits into
a 16 byte neon store and an unaligned 4 byte store with a 15 byte
offset. This 15byte offset (with a base align of 8) wraps around to the
next alignment windows. When compared to an access that is a 16byte
offset (of the same 4byte size and 8byte basealign), the two accesses
are said not to alias.

I've fixed this here by just ensuring that the offsets are a multiple of
the size, ensuring that they don't overlap by wrapping. Fixes PR45035,
which was exposed by the UseAA changes in the arm backend.

Differential Revision: https://reviews.llvm.org/D75238

Added: 
    llvm/test/CodeGen/ARM/memset-align.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e2d1c3c9b1be..f95519b8eba5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21107,21 +21107,24 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
   // If we know required SrcValue1 and SrcValue2 have relatively large
   // alignment compared to the size and offset of the access, we may be able
   // to prove they do not alias. This check is conservative for now to catch
-  // cases created by splitting vector types.
+  // cases created by splitting vector types, it only works when the offsets are
+  // multiples of the size of the data.
   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
   unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
   unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
+  auto &Size0 = MUC0.NumBytes;
+  auto &Size1 = MUC1.NumBytes;
   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
-      MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
-      *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
+      Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 &&
+      OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
+      SrcValOffset1 % *Size1 == 0) {
     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
 
     // There is no overlap between these relatively aligned accesses of
     // similar size. Return no alias.
-    if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
-        (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
+    if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
       return false;
   }
 
@@ -21134,11 +21137,12 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
     UseAA = false;
 #endif
 
-  if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
+  if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
+      Size0.hasValue() && Size1.hasValue()) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
-    int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
-    int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
+    int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
+    int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
     AliasResult AAResult = AA->alias(
         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),

diff  --git a/llvm/test/CodeGen/ARM/memset-align.ll b/llvm/test/CodeGen/ARM/memset-align.ll
new file mode 100644
index 000000000000..b8f42f8de9cf
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/memset-align.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=thumbv8-unknown-linux-android -o - | FileCheck %s
+
+%struct.af = type <{ i64, i64, i8, i8, i8, [5 x i8] }>
+
+define void @test() {
+; CHECK-LABEL: test:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    .pad #24
+; CHECK-NEXT:    sub sp, #24
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    mov.w r1, #-1
+; CHECK-NEXT:    vmov.i32 q8, #0x0
+; CHECK-NEXT:    movs r2, #15
+; CHECK-NEXT:    mov r3, r0
+; CHECK-NEXT:    strd r1, r1, [sp, #8]
+; CHECK-NEXT:    strd r1, r1, [sp]
+; CHECK-NEXT:    str r1, [sp, #16]
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r3], r2
+; CHECK-NEXT:    movs r2, #0
+; CHECK-NEXT:    str r2, [r3]
+; CHECK-NEXT:    str r1, [sp, #20]
+; CHECK-NEXT:    bl callee
+; CHECK-NEXT:    add sp, #24
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %a = alloca %struct.af, align 8
+  %0 = bitcast %struct.af* %a to i8*
+  %1 = bitcast %struct.af* %a to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 8 %1, i8 -1, i64 24, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 19, i1 false)
+  call void @callee(%struct.af* %a)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
+declare void @callee(%struct.af*) local_unnamed_addr #1


        


More information about the llvm-commits mailing list