[llvm] ca051a4 - InstCombineCalls: infer return alignment from allocalign attributes

Thu Apr 7 09:38:51 PDT 2022

Author: Augie Fackler
Date: 2022-04-07T12:38:44-04:00
New Revision: ca051a46fbba869d3ac2c76509623a13ede70f5e

URL: https://github.com/llvm/llvm-project/commit/ca051a46fbba869d3ac2c76509623a13ede70f5e
DIFF: https://github.com/llvm/llvm-project/commit/ca051a46fbba869d3ac2c76509623a13ede70f5e.diff

LOG: InstCombineCalls: infer return alignment from allocalign attributes

This exposes a couple of lingering bugs, which will be fixed in
the next two commits.

Differential Revision: https://reviews.llvm.org/D123052

Added: 
    llvm/test/Transforms/InstCombine/InferAlignAttribute.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c8f208d2da0d4..cf44ee057783b 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2813,17 +2813,19 @@ void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryI
   // of some allocation functions) are expected to be handled via annotation
   // of the respective allocator declaration with generic attributes.
 
-  uint64_t Size;
-  ObjectSizeOpts Opts;
-  if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) {
-    // TODO: We really should just emit deref_or_null here and then
-    // let the generic inference code combine that with nonnull.
-    if (Call.hasRetAttr(Attribute::NonNull))
-      Call.addRetAttr(Attribute::getWithDereferenceableBytes(
-          Call.getContext(), Size));
-    else
-      Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
-          Call.getContext(), Size));
+  if (isAllocationFn(&Call, TLI)) {
+    uint64_t Size;
+    ObjectSizeOpts Opts;
+    if (getObjectSize(&Call, Size, DL, TLI, Opts) && Size > 0) {
+      // TODO: We really should just emit deref_or_null here and then
+      // let the generic inference code combine that with nonnull.
+      if (Call.hasRetAttr(Attribute::NonNull))
+        Call.addRetAttr(
+            Attribute::getWithDereferenceableBytes(Call.getContext(), Size));
+      else
+        Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+            Call.getContext(), Size));
+    }
   }
 
   // Add alignment attribute if alignment is a power of two constant.
@@ -2844,8 +2846,7 @@ void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryI
 
 /// Improvements for call, callbr and invoke instructions.
 Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
-  if (isAllocationFn(&Call, &TLI))
-    annotateAnyAllocSite(Call, &TLI);
+  annotateAnyAllocSite(Call, &TLI);
 
   bool Changed = false;
 

diff  --git a/llvm/test/Transforms/InstCombine/InferAlignAttribute.ll b/llvm/test/Transforms/InstCombine/InferAlignAttribute.ll
new file mode 100644
index 0000000000000..bdf00bd4ed0d8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/InferAlignAttribute.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -inline -instcombine %s | FileCheck %s --check-prefixes=CHECK,CHECK-INLINE
+; RUN: opt -S -instcombine %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOINLINE
+
+define i8* @widen_align_from_allocalign_callsite() {
+; CHECK-LABEL: @widen_align_from_allocalign_callsite(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call align 64 i8* @my_aligned_alloc_2(i32 noundef 320, i32 allocalign noundef 64)
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+entry:
+
+  %call = tail call align 16 i8* @my_aligned_alloc_2(i32 noundef 320, i32 allocalign noundef 64)
+  ret i8* %call
+}
+
+; BUG: we don't check the declaration, only the callsite. This will be fixed in the next change.
+define i8* @widen_align_from_allocalign() {
+; CHECK-LABEL: @widen_align_from_allocalign(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call align 16 i8* @my_aligned_alloc(i32 noundef 320, i32 noundef 64)
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+entry:
+
+  %call = tail call align 16 i8* @my_aligned_alloc(i32 noundef 320, i32 noundef 64)
+  ret i8* %call
+}
+
+define i8* @dont_narrow_align_from_allocalign() {
+; CHECK-LABEL: @dont_narrow_align_from_allocalign(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call align 16 i8* @my_aligned_alloc(i32 noundef 320, i32 noundef 8)
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+entry:
+  %call = tail call align 16 i8* @my_aligned_alloc(i32 noundef 320, i32 noundef 8)
+  ret i8* %call
+}
+
+define i8* @my_aligned_alloc_3(i32 noundef %foo, i32 allocalign %alignment) {
+; CHECK-LABEL: @my_aligned_alloc_3(
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @my_aligned_alloc_2(i32 noundef [[FOO:%.*]], i32 noundef [[ALIGNMENT:%.*]])
+; CHECK-NEXT:    ret i8* [[CALL]]
+;
+  %call = tail call i8* @my_aligned_alloc_2(i32 noundef %foo, i32 noundef %alignment)
+  ret i8* %call
+}
+
+; -inline is able to make my_aligned_alloc_3's arguments disappear and directly
+; call my_aligned_alloc_2, but the latter has no allocalign so the alignment just
+; disappears. This is conservatively correct but undesirable because we can't
+; figure out the `align 128` on the return value once the call is directly on
+; my_aligned_alloc_2. Note that this is a simplified version of what happens
+; with _mm_malloc which calls posix_memalign.
+define i8* @allocalign_disappears() {
+; CHECK-INLINE-LABEL: @allocalign_disappears(
+; CHECK-INLINE-NEXT:    [[CALL_I:%.*]] = tail call i8* @my_aligned_alloc_2(i32 noundef 42, i32 noundef 128)
+; CHECK-INLINE-NEXT:    ret i8* [[CALL_I]]
+;
+; CHECK-NOINLINE-LABEL: @allocalign_disappears(
+; CHECK-NOINLINE-NEXT:    [[CALL:%.*]] = tail call i8* @my_aligned_alloc_3(i32 42, i32 128)
+; CHECK-NOINLINE-NEXT:    ret i8* [[CALL]]
+;
+  %call = tail call i8* @my_aligned_alloc_3(i32 42, i32 128)
+  ret i8* %call
+}
+
+declare i8* @my_aligned_alloc(i32 noundef, i32 allocalign noundef)
+declare i8* @my_aligned_alloc_2(i32 noundef, i32 noundef)