[llvm] Add the "initializes" attribute inference (PR #97373)

Mon Jul 1 18:58:09 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-coroutines

Author: Haopeng Liu (haopliu)

<details>
<summary>Changes</summary>

Add the "initializes" attribute inference.

This change is expected to have ~0.09% compile time regression, which seems acceptable for interprocedural DSE.
https://llvm-compile-time-tracker.com/compare.php?from=9f10252c4ad7cffbbcf692fa9c953698f82ac4f5&to=56345c1cee4375eb5c28b8e7abf4803d20216b3b&stat=instructions%3Au

---

Patch is 89.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97373.diff


15 Files Affected:

- (modified) llvm/lib/Transforms/IPO/FunctionAttrs.cpp (+331-3) 
- (modified) llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll (+1-1) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll (+53-53) 
- (modified) llvm/test/CodeGen/BPF/preserve-static-offset/store-zero.ll (+1-1) 
- (modified) llvm/test/Other/optimize-inrange-gep.ll (+1-1) 
- (modified) llvm/test/Transforms/Coroutines/coro-async.ll (+3-3) 
- (modified) llvm/test/Transforms/FunctionAttrs/argmemonly.ll (+5-5) 
- (added) llvm/test/Transforms/FunctionAttrs/initializes.ll (+472) 
- (modified) llvm/test/Transforms/FunctionAttrs/nocapture.ll (+1-1) 
- (modified) llvm/test/Transforms/FunctionAttrs/readattrs.ll (+2-2) 
- (modified) llvm/test/Transforms/FunctionAttrs/writeonly.ll (+3-3) 
- (modified) llvm/test/Transforms/PGOProfile/memprof_internal_linkage.ll (+3-2) 
- (modified) llvm/test/Transforms/PhaseOrdering/X86/unroll-vectorizer.ll (+1-1) 
- (modified) llvm/test/Transforms/PhaseOrdering/pr95152.ll (+3-3) 


``````````diff

diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 7b419d0f098b5..507dbf4ef26f0 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
@@ -36,6 +37,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRangeList.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstIterator.h"
@@ -580,6 +582,205 @@ struct ArgumentUsesTracker : public CaptureTracker {
   const SCCNodeSet &SCCNodes;
 };
 
+struct ArgumentUse {
+  Use *U;
+  std::optional<int64_t> Offset;
+};
+
+// A struct of argument access info. "Unknown" accesses are the cases like
+// unrecognized instructions, instructions that have more than one use of
+// the argument, or volatile memory accesses. "Unknown" implies "IsClobber"
+// and an empty access range.
+// Write or Read accesses can be clobbers as well for example, a Load with
+// scalable type.
+struct ArgumentAccessInfo {
+  enum AccessType { Write, Read, Unknown };
+  AccessType ArgAccessType;
+  ConstantRangeList AccessRanges;
+  bool IsClobber = false;
+};
+
+struct UsesPerBlockInfo {
+  DenseMap<Instruction *, ArgumentAccessInfo> Insts;
+  bool HasWrites;
+  bool HasClobber;
+};
+
+ArgumentAccessInfo GetArgmentAccessInfo(const Instruction *I,
+                                        const ArgumentUse &IU,
+                                        const DataLayout &DL) {
+  auto GetTypeAccessRange =
+      [&DL](Type *Ty,
+            std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
+    auto TypeSize = DL.getTypeStoreSize(Ty);
+    if (!TypeSize.isScalable() && Offset.has_value()) {
+      int64_t Size = TypeSize.getFixedValue();
+      return ConstantRange(APInt(64, Offset.value(), true),
+                           APInt(64, Offset.value() + Size, true));
+    }
+    return std::nullopt;
+  };
+  auto GetConstantIntRange =
+      [](Value *Length,
+         std::optional<int64_t> Offset) -> std::optional<ConstantRange> {
+    auto *ConstantLength = dyn_cast<ConstantInt>(Length);
+    if (ConstantLength && Offset.has_value()) {
+      return ConstantRange(
+          APInt(64, Offset.value(), true),
+          APInt(64, Offset.value() + ConstantLength->getSExtValue(), true));
+    }
+    return std::nullopt;
+  };
+  if (auto *SI = dyn_cast<StoreInst>(I)) {
+    if (&SI->getOperandUse(1) == IU.U) {
+      // Get the fixed type size of "SI". Since the access range of a write
+      // will be unioned, if "SI" doesn't have a fixed type size, we just set
+      // the access range to empty.
+      ConstantRangeList AccessRanges;
+      auto TypeAccessRange = GetTypeAccessRange(SI->getAccessType(), IU.Offset);
+      if (TypeAccessRange.has_value())
+        AccessRanges.insert(TypeAccessRange.value());
+      return {ArgumentAccessInfo::AccessType::Write, AccessRanges,
+              /*IsClobber=*/false};
+    }
+  } else if (auto *LI = dyn_cast<LoadInst>(I)) {
+    if (&LI->getOperandUse(0) == IU.U) {
+      // Get the fixed type size of "LI". Different from Write, if "LI"
+      // doesn't have a fixed type size, we conservatively set as a clobber
+      // with an empty access range.
+      auto TypeAccessRange = GetTypeAccessRange(LI->getAccessType(), IU.Offset);
+      if (TypeAccessRange.has_value())
+        return {ArgumentAccessInfo::AccessType::Read,
+                {TypeAccessRange.value()},
+                /*IsClobber=*/false};
+      else
+        return {ArgumentAccessInfo::AccessType::Read, {}, /*IsClobber=*/true};
+    }
+  } else if (auto *MemSet = dyn_cast<MemSetInst>(I)) {
+    if (!MemSet->isVolatile()) {
+      ConstantRangeList AccessRanges;
+      auto AccessRange = GetConstantIntRange(MemSet->getLength(), IU.Offset);
+      if (AccessRange.has_value())
+        AccessRanges.insert(AccessRange.value());
+      return {ArgumentAccessInfo::AccessType::Write, AccessRanges,
+              /*IsClobber=*/false};
+    }
+  } else if (auto *MemCpy = dyn_cast<MemCpyInst>(I)) {
+    if (!MemCpy->isVolatile()) {
+      if (&MemCpy->getOperandUse(0) == IU.U) {
+        ConstantRangeList AccessRanges;
+        auto AccessRange = GetConstantIntRange(MemCpy->getLength(), IU.Offset);
+        if (AccessRange.has_value())
+          AccessRanges.insert(AccessRange.value());
+        return {ArgumentAccessInfo::AccessType::Write, AccessRanges,
+                /*IsClobber=*/false};
+      } else if (&MemCpy->getOperandUse(1) == IU.U) {
+        auto AccessRange = GetConstantIntRange(MemCpy->getLength(), IU.Offset);
+        if (AccessRange.has_value())
+          return {ArgumentAccessInfo::AccessType::Read,
+                  {AccessRange.value()},
+                  /*IsClobber=*/false};
+        else
+          return {ArgumentAccessInfo::AccessType::Read, {}, /*IsClobber=*/true};
+      }
+    }
+  } else if (auto *CB = dyn_cast<CallBase>(I)) {
+    if (CB->isArgOperand(IU.U)) {
+      unsigned ArgNo = CB->getArgOperandNo(IU.U);
+      bool IsInitialize = CB->paramHasAttr(ArgNo, Attribute::Initializes);
+      // Argument is only not clobbered when parameter is writeonly/readnone
+      // and nocapture.
+      bool IsClobber = !(CB->onlyWritesMemory(ArgNo) &&
+                         CB->paramHasAttr(ArgNo, Attribute::NoCapture));
+      ConstantRangeList AccessRanges;
+      if (IsInitialize && IU.Offset.has_value()) {
+        Attribute Attr = CB->getParamAttr(ArgNo, Attribute::Initializes);
+        if (!Attr.isValid()) {
+          Attr = CB->getCalledFunction()->getParamAttribute(
+              ArgNo, Attribute::Initializes);
+        }
+        ConstantRangeList CBCRL = Attr.getValueAsConstantRangeList();
+        for (ConstantRange &CR : CBCRL) {
+          AccessRanges.insert(ConstantRange(CR.getLower() + IU.Offset.value(),
+                                            CR.getUpper() + IU.Offset.value()));
+        }
+        return {ArgumentAccessInfo::AccessType::Write, AccessRanges, IsClobber};
+      }
+    }
+  }
+  // Unrecognized instructions are considered clobbers.
+  return {ArgumentAccessInfo::AccessType::Unknown, {}, /*IsClobber=*/true};
+}
+
+std::pair<bool, bool> CollectArgumentUsesPerBlock(
+    Argument &A, Function &F,
+    DenseMap<const BasicBlock *, UsesPerBlockInfo> &UsesPerBlock) {
+  auto &DL = F.getParent()->getDataLayout();
+  auto PointerSize =
+      DL.getIndexSizeInBits(A.getType()->getPointerAddressSpace());
+
+  bool HasAnyWrite = false;
+  bool HasWriteOutsideEntryBB = false;
+
+  BasicBlock &EntryBB = F.getEntryBlock();
+  SmallVector<ArgumentUse, 4> Worklist;
+  for (Use &U : A.uses())
+    Worklist.push_back({&U, 0});
+
+  auto UpdateUseInfo = [&UsesPerBlock](Instruction *I,
+                                       ArgumentAccessInfo Info) {
+    auto *BB = I->getParent();
+    auto &BBInfo = UsesPerBlock.getOrInsertDefault(BB);
+    bool AlreadyVisitedInst = BBInfo.Insts.contains(I);
+    auto &IInfo = BBInfo.Insts[I];
+
+    // Instructions that have more than one use of the argument are considered
+    // as clobbers.
+    if (AlreadyVisitedInst) {
+      IInfo = {ArgumentAccessInfo::AccessType::Unknown, {}, true};
+      BBInfo.HasClobber = true;
+      return false;
+    }
+
+    IInfo = Info;
+    BBInfo.HasClobber |= IInfo.IsClobber;
+    BBInfo.HasWrites |=
+        (IInfo.ArgAccessType == ArgumentAccessInfo::AccessType::Write &&
+         !IInfo.AccessRanges.empty());
+    return !IInfo.AccessRanges.empty();
+  };
+
+  // No need for a visited set because we don't look through phis, so there are
+  // no cycles.
+  while (!Worklist.empty()) {
+    ArgumentUse IU = Worklist.pop_back_val();
+    User *U = IU.U->getUser();
+    // Add GEP uses to worklist.
+    // If the GEP is not a constant GEP, set IsInitialize to false.
+    if (auto *GEP = dyn_cast<GEPOperator>(U)) {
+      APInt Offset(PointerSize, 0, /*isSigned=*/true);
+      bool IsConstGEP = GEP->accumulateConstantOffset(DL, Offset);
+      std::optional<int64_t> NewOffset = std::nullopt;
+      if (IsConstGEP && IU.Offset.has_value()) {
+        NewOffset = *IU.Offset + Offset.getSExtValue();
+      }
+      for (Use &U : GEP->uses())
+        Worklist.push_back({&U, NewOffset});
+      continue;
+    }
+
+    auto *I = cast<Instruction>(U);
+    bool HasWrite = UpdateUseInfo(I, GetArgmentAccessInfo(I, IU, DL));
+
+    HasAnyWrite |= HasWrite;
+
+    if (HasWrite && I->getParent() != &EntryBB) {
+      HasWriteOutsideEntryBB = true;
+    }
+  }
+  return {HasAnyWrite, HasWriteOutsideEntryBB};
+}
+
 } // end anonymous namespace
 
 namespace llvm {
@@ -866,9 +1067,132 @@ static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
   return true;
 }
 
+static bool inferInitializes(Argument &A, Function &F) {
+  DenseMap<const BasicBlock *, UsesPerBlockInfo> UsesPerBlock;
+  auto [HasAnyWrite, HasWriteOutsideEntryBB] =
+      CollectArgumentUsesPerBlock(A, F, UsesPerBlock);
+  // No write anywhere in the function, bail.
+  if (!HasAnyWrite)
+    return false;
+
+  BasicBlock &EntryBB = F.getEntryBlock();
+  DenseMap<const BasicBlock *, ConstantRangeList> Initialized;
+  auto VisitBlock = [&](const BasicBlock *BB) -> ConstantRangeList {
+    auto UPB = UsesPerBlock.find(BB);
+
+    // If this block has uses and none are writes, the argument is not
+    // initialized in this block.
+    if (UPB != UsesPerBlock.end() && !UPB->second.HasWrites)
+      return ConstantRangeList();
+
+    ConstantRangeList CRL;
+
+    // Start with intersection of successors.
+    // If this block has any clobbering use, we're going to clear out the
+    // ranges at some point in this block anyway, so don't bother looking at
+    // successors.
+    if (UPB == UsesPerBlock.end() || !UPB->second.HasClobber) {
+      bool HasAddedSuccessor = false;
+      for (auto *Succ : successors(BB)) {
+        if (auto SuccI = Initialized.find(Succ); SuccI != Initialized.end()) {
+          if (HasAddedSuccessor) {
+            CRL = CRL.intersectWith(SuccI->second);
+          } else {
+            CRL = SuccI->second;
+            HasAddedSuccessor = true;
+          }
+        } else {
+          CRL = ConstantRangeList();
+          break;
+        }
+      }
+    }
+
+    if (UPB != UsesPerBlock.end()) {
+      // Sort uses in this block by instruction order.
+      SmallVector<std::pair<Instruction *, ArgumentAccessInfo>, 2> Insts;
+      append_range(Insts, UPB->second.Insts);
+      sort(Insts, [](std::pair<Instruction *, ArgumentAccessInfo> &LHS,
+                     std::pair<Instruction *, ArgumentAccessInfo> &RHS) {
+        return LHS.first->comesBefore(RHS.first);
+      });
+
+      // From the end of the block to the beginning of the block, set
+      // initializes ranges.
+      for (auto [_, Info] : reverse(Insts)) {
+        if (Info.IsClobber) {
+          CRL = ConstantRangeList();
+        }
+        if (!Info.AccessRanges.empty()) {
+          if (Info.ArgAccessType == ArgumentAccessInfo::AccessType::Write) {
+            CRL = CRL.unionWith(Info.AccessRanges);
+          } else {
+            assert(Info.ArgAccessType == ArgumentAccessInfo::AccessType::Read);
+            for (const auto &ReadRange : Info.AccessRanges) {
+              CRL.subtract(ReadRange);
+            }
+          }
+        }
+      }
+    }
+    return CRL;
+  };
+
+  ConstantRangeList EntryCRL;
+  // If all write instructions are in the EntryBB, or if the EntryBB has
+  // a clobbering use, we only need to look at EntryBB.
+  bool OnlyScanEntryBlock = !HasWriteOutsideEntryBB;
+  if (!OnlyScanEntryBlock) {
+    if (auto EntryUPB = UsesPerBlock.find(&EntryBB);
+        EntryUPB != UsesPerBlock.end()) {
+      OnlyScanEntryBlock = EntryUPB->second.HasClobber;
+    }
+  }
+  if (OnlyScanEntryBlock) {
+    EntryCRL = VisitBlock(&EntryBB);
+    if (EntryCRL.empty()) {
+      return false;
+    }
+  } else {
+    // Visit successors before predecessors with a post-order walk of the
+    // blocks.
+    for (const BasicBlock *BB : post_order(&F)) {
+      ConstantRangeList CRL = VisitBlock(BB);
+      if (!CRL.empty()) {
+        Initialized[BB] = CRL;
+      }
+    }
+
+    auto EntryCRLI = Initialized.find(&EntryBB);
+    if (EntryCRLI == Initialized.end()) {
+      return false;
+    }
+
+    EntryCRL = EntryCRLI->second;
+  }
+
+  assert(!EntryCRL.empty() &&
+         "should have bailed already if EntryCRL is empty");
+
+  if (A.hasAttribute(Attribute::Initializes)) {
+    ConstantRangeList PreviousCRL =
+        A.getAttribute(Attribute::Initializes).getValueAsConstantRangeList();
+    if (PreviousCRL == EntryCRL) {
+      return false;
+    }
+    EntryCRL = EntryCRL.unionWith(PreviousCRL);
+  }
+
+  A.addAttr(Attribute::get(A.getContext(), Attribute::Initializes,
+                           EntryCRL.rangesRef()));
+
+  return true;
+}
+
 /// Deduce nocapture attributes for the SCC.
 static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
-                             SmallSet<Function *, 8> &Changed) {
+                             SmallSet<Function *, 8> &Changed,
+                             bool SkipInitializes) {
   ArgumentGraph AG;
 
   // Check each function in turn, determining which pointer arguments are not
@@ -936,6 +1260,10 @@ static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
           if (addAccessAttr(&A, R))
             Changed.insert(F);
       }
+      if (!SkipInitializes && !A.onlyReadsMemory()) {
+        if (inferInitializes(A, *F))
+          Changed.insert(F);
+      }
     }
   }
 
@@ -1844,13 +2172,13 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter,
 
   SmallSet<Function *, 8> Changed;
   if (ArgAttrsOnly) {
-    addArgumentAttrs(Nodes.SCCNodes, Changed);
+    addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/true);
     return Changed;
   }
 
   addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
   addMemoryAttrs(Nodes.SCCNodes, AARGetter, Changed);
-  addArgumentAttrs(Nodes.SCCNodes, Changed);
+  addArgumentAttrs(Nodes.SCCNodes, Changed, /*SkipInitializes=*/false);
   inferConvergent(Nodes.SCCNodes, Changed);
   addNoReturnAttrs(Nodes.SCCNodes, Changed);
   addWillReturn(Nodes.SCCNodes, Changed);
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index bea56a72bdeae..8615363a985d1 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -15,7 +15,7 @@ define void @test0_yes(ptr %p) nounwind {
   ret void
 }
 
-; CHECK: define void @test0_no(ptr nocapture writeonly %p) #1 {
+; CHECK: define void @test0_no(ptr nocapture writeonly initializes((0, 4)) %p) #1 {
 define void @test0_no(ptr %p) nounwind {
   store i32 0, ptr %p, !tbaa !2
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll
index 6b835bb4eef66..317a069eed26e 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll
@@ -10,7 +10,7 @@
 ; Should have call to sincos declarations, not calls to the asm pseudo-libcalls
 define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, float noundef %x) #0 {
 ; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865(
-; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 8)) [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5)
 ; CHECK-NEXT:    [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
index a35fbaadddf9e..619124affff81 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
@@ -49,7 +49,7 @@ declare float @_Z6sincosfPU3AS0f(float %x, ptr writeonly %ptr) #1
 
 define void @sincos_f16_nocontract(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
 ; CHECK-LABEL: define void @sincos_f16_nocontract
-; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call half @_Z3sinDh(half [[X]])
 ; CHECK-NEXT:    store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2
@@ -68,7 +68,7 @@ entry:
 
 define void @sincos_v2f16_nocontract(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
 ; CHECK-LABEL: define void @sincos_v2f16_nocontract
-; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 4)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z3sinDv2_Dh(<2 x half> [[X]])
 ; CHECK-NEXT:    store <2 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
@@ -87,7 +87,7 @@ entry:
 
 define void @sincos_f16(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
 ; CHECK-LABEL: define void @sincos_f16
-; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = tail call contract half @_Z3sinDh(half [[X]])
 ; CHECK-NEXT:    store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2
@@ -105,7 +105,7 @@ entry:
 
 define void @sincos_f16_order1(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
 ; CHECK-LABEL: define void @sincos_f16_order1
-; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly initializes((0, 2)) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract half @_Z3cosDh(half [[X]])
 ; CHECK-NEXT:    store half [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 2
@@ -123,7 +123,7 @@ entry:
 
 define void @sincos_v2f16(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
 ; CHECK-LABEL: define void @sincos_v2f16
-; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-SAME: (<2 x half> [[X:%.*]], p...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/97373