[llvm] [AMDGPU][Attributor] Infer `inreg` attribute in `AMDGPUAttributor` (PR #101609)

Sun Sep 1 14:51:12 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Shilei Tian (shiltian)

<details>
<summary>Changes</summary>

This patch introduces `AAAMDGPUInreg` that can infer `inreg` function argument attribute. The idea is, for a function argument, if the corresponding call site arguments are uniform, we can mark it as `inreg` thus pass it via SGPR.

In addition, this AA is also able to propagate the `inreg` attribute if feasible.

---
Full diff: https://github.com/llvm/llvm-project/pull/101609.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+99-1) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll (+1-1) 
- (added) llvm/test/CodeGen/AMDGPU/inreg-inference.ll (+66) 
- (modified) llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll (+20-19) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 72049f0aa6b86e..32b27aa71cc9ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -14,6 +14,7 @@
 #include "GCNSubtarget.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/Analysis/CycleAnalysis.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsR600.h"
@@ -1014,6 +1015,97 @@ struct AAAMDGPUNoAGPR
 
 const char AAAMDGPUNoAGPR::ID = 0;
 
+struct AAAMDGPUInreg
+    : public IRAttribute<Attribute::InReg,
+                         StateWrapper<BooleanState, AbstractAttribute>,
+                         AAAMDGPUInreg> {
+  AAAMDGPUInreg(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAAMDGPUInreg &createForPosition(const IRPosition &IRP, Attributor &A);
+
+  /// See AbstractAttribute::getName()
+  const std::string getName() const override { return "AAAMDGPUInreg"; }
+
+  const std::string getAsStr(Attributor *A) const override {
+    return getAssumed() ? "inreg" : "non-inreg";
+  }
+
+  void trackStatistics() const override {}
+
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AAAMDGPUInreg
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
+const char AAAMDGPUInreg::ID = 0;
+
+namespace {
+
+struct AAAMDGPUInregArgument : public AAAMDGPUInreg {
+  AAAMDGPUInregArgument(const IRPosition &IRP, Attributor &A)
+      : AAAMDGPUInreg(IRP, A) {}
+
+  void initialize(Attributor &A) override {
+    if (getAssociatedArgument()->hasAttribute(Attribute::InReg))
+      indicateOptimisticFixpoint();
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    unsigned ArgNo = getAssociatedArgument()->getArgNo();
+
+    auto Pred = [&](AbstractCallSite ACS) -> bool {
+      CallBase *CB = ACS.getInstruction();
+      Value *V = CB->getArgOperandUse(ArgNo);
+      if (auto *G = dyn_cast<GlobalValue>(V))
+        return true;
+      if (auto *I = dyn_cast<Instruction>(V)) {
+        auto AU = A.getInfoCache()
+                      .getAnalysisResultForFunction<UniformityInfoAnalysis>(
+                          *I->getFunction());
+        return AU && AU->isUniform(I);
+      }
+      if (auto *Arg = dyn_cast<Argument>(V)) {
+        auto *AA =
+            A.getOrCreateAAFor<AAAMDGPUInreg>(IRPosition::argument(*Arg));
+        return AA && AA->isValidState();
+      }
+      // For unforeseen cases, we need to assume it is not uniform thus not
+      // qualified for inreg.
+      return false;
+    };
+
+    bool UsedAssumedInformation = false;
+    if (!A.checkForAllCallSites(Pred, *this, /*RequireAllCallSites=*/true,
+                                UsedAssumedInformation))
+      return indicatePessimisticFixpoint();
+
+    if (!UsedAssumedInformation)
+      return indicateOptimisticFixpoint();
+
+    return ChangeStatus::UNCHANGED;
+  }
+};
+
+} // namespace
+
+AAAMDGPUInreg &AAAMDGPUInreg::createForPosition(const IRPosition &IRP,
+                                                Attributor &A) {
+  switch (IRP.getPositionKind()) {
+  case IRPosition::IRP_ARGUMENT:
+    return *new (A.Allocator) AAAMDGPUInregArgument(IRP, A);
+  default:
+    llvm_unreachable("not a valid position for AAAMDGPUInreg");
+  }
+}
+
 static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {
   const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
   for (unsigned I = 0;
@@ -1046,7 +1138,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
        &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
        &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
        &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
-       &AAInstanceInfo::ID});
+       &AAInstanceInfo::ID, &AAAMDGPUInreg::ID});
 
   AttributorConfig AC(CGUpdater);
   AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1090,6 +1182,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
             IRPosition::value(*SI->getPointerOperand()));
       }
     }
+
+    if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL) {
+      for (auto &Arg : F.args())
+        A.getOrCreateAAFor<AAAMDGPUInreg>(IRPosition::argument(Arg));
+    }
   }
 
   ChangeStatus Change = A.run();
@@ -1118,6 +1215,7 @@ class AMDGPUAttributorLegacy : public ModulePass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<CycleInfoWrapperPass>();
+    AU.addRequired<UniformityInfoWrapperPass>();
   }
 
   StringRef getPassName() const override { return "AMDGPU Attributor"; }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll
index d58a62408427dc..4f46e08921a49b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-accesslist-offsetbins-out-of-sync.ll
@@ -8,7 +8,7 @@
 
 define internal fastcc void @foo(ptr %kg) {
 ; CHECK-LABEL: define internal fastcc void @foo(
-; CHECK-SAME: ptr [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ptr inreg [[KG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[CLOSURE_I25_I:%.*]] = getelementptr i8, ptr [[KG]], i64 336
 ; CHECK-NEXT:    [[NUM_CLOSURE_I26_I:%.*]] = getelementptr i8, ptr [[KG]], i64 276
diff --git a/llvm/test/CodeGen/AMDGPU/inreg-inference.ll b/llvm/test/CodeGen/AMDGPU/inreg-inference.ll
new file mode 100644
index 00000000000000..94e5e700a78b14
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/inreg-inference.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor %s -o - | FileCheck %s
+
+ at g1 = protected addrspace(1) externally_initialized global i32 0, align 4
+ at g2 = protected addrspace(1) externally_initialized global i32 0, align 4
+
+;.
+; CHECK: @g1 = protected addrspace(1) externally_initialized global i32 0, align 4
+; CHECK: @g2 = protected addrspace(1) externally_initialized global i32 0, align 4
+;.
+define internal fastcc void @f(ptr %x, ptr %y) {
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (ptr inreg [[X:%.*]], ptr inreg [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_VAL:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    store i32 [[X_VAL]], ptr addrspace(1) @g1, align 4
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[Y]], align 4
+; CHECK-NEXT:    store i32 [[LOAD]], ptr addrspace(1) @g2, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %x.val = load i32, ptr %x, align 4
+  store i32 %x.val, ptr addrspace(1) @g1, align 4
+  %load = load i32, ptr %y, align 4
+  store i32 %load, ptr addrspace(1) @g2, align 4
+  ret void
+}
+
+define protected amdgpu_kernel void @kernel(ptr addrspace(1) %x2, i32 %z) {
+; CHECK-LABEL: define {{[^@]+}}@kernel
+; CHECK-SAME: (ptr addrspace(1) [[X2:%.*]], i32 [[Z:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X2_CAST:%.*]] = addrspacecast ptr addrspace(1) [[X2]] to ptr
+; CHECK-NEXT:    [[QUEUE_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+; CHECK-NEXT:    [[QUEUE_PTR_CAST:%.*]] = addrspacecast ptr addrspace(4) [[QUEUE_PTR]] to ptr
+; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT:    [[IMPLICITARG_PTR_CAST:%.*]] = addrspacecast ptr addrspace(4) [[IMPLICITARG_PTR]] to ptr
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[Z]], 0
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], ptr [[QUEUE_PTR_CAST]], ptr [[X2_CAST]]
+; CHECK-NEXT:    tail call fastcc void @f(ptr [[COND]], ptr noundef [[IMPLICITARG_PTR_CAST]])
+; CHECK-NEXT:    [[DOTVAL:%.*]] = load i32, ptr addrspace(4) [[QUEUE_PTR]], align 4
+; CHECK-NEXT:    tail call fastcc void @f(ptr [[COND]], ptr noundef [[IMPLICITARG_PTR_CAST]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %x2.cast = addrspacecast ptr addrspace(1) %x2 to ptr
+  %queue.ptr = tail call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+  %queue.ptr.cast = addrspacecast ptr addrspace(4) %queue.ptr to ptr
+  %implicitarg.ptr = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %implicitarg.ptr.cast = addrspacecast ptr addrspace(4) %implicitarg.ptr to ptr
+  %cmp = icmp sgt i32 %z, 0
+  %cond = select i1 %cmp, ptr %queue.ptr.cast, ptr %x2.cast
+  tail call fastcc void @f(ptr %cond, ptr noundef %implicitarg.ptr.cast)
+  %.val = load i32, ptr addrspace(4) %queue.ptr, align 4
+  tail call fastcc void @f(ptr %cond, ptr noundef %implicitarg.ptr.cast)
+  ret void
+}
+
+declare align 4 ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+
+declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+;.
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
index 384a9c4043a1d3..65a6322dd4730a 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
@@ -8,11 +8,11 @@
 @recursive.kernel.lds = addrspace(3) global i16 poison
 
 ;.
-; CHECK: @[[LLVM_AMDGCN_KERNEL_K0_F0_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_K0_F0_LDS_T:%.*]] poison, align 2, !absolute_symbol !0
-; CHECK: @[[LLVM_AMDGCN_KERNEL_K1_F0_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_K1_F0_LDS_T:%.*]] poison, align 2, !absolute_symbol !0
-; CHECK: @[[LLVM_AMDGCN_KERNEL_KERNEL_LDS_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_KERNEL_LDS_LDS_T:%.*]] poison, align 2, !absolute_symbol !0
-; CHECK: @[[LLVM_AMDGCN_KERNEL_KERNEL_LDS_RECURSION_LDS:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [[LLVM_AMDGCN_KERNEL_KERNEL_LDS_RECURSION_LDS_T:%.*]] poison, align 2, !absolute_symbol !0
-; CHECK: @[[LLVM_AMDGCN_LDS_OFFSET_TABLE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(4) constant [3 x [2 x i32]]
+; CHECK: @llvm.amdgcn.kernel.k0_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k0_f0.lds.t poison, align 2, !absolute_symbol [[META0:![0-9]+]]
+; CHECK: @llvm.amdgcn.kernel.k1_f0.lds = internal addrspace(3) global %llvm.amdgcn.kernel.k1_f0.lds.t poison, align 2, !absolute_symbol [[META0]]
+; CHECK: @llvm.amdgcn.kernel.kernel_lds.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds.lds.t poison, align 2, !absolute_symbol [[META0]]
+; CHECK: @llvm.amdgcn.kernel.kernel_lds_recursion.lds = internal addrspace(3) global %llvm.amdgcn.kernel.kernel_lds_recursion.lds.t poison, align 2, !absolute_symbol [[META0]]
+; CHECK: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [2 x i32]] [[2 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k0_f0.lds to i32), i32 poison], [2 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k1_f0.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds, i32 0, i32 1) to i32)], [2 x i32] [i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds_recursion.lds to i32)]]
 ;.
 define internal void @lds_use_through_indirect() {
 ; CHECK-LABEL: define internal void @lds_use_through_indirect(
@@ -105,7 +105,7 @@ define internal void @f0_transitive() {
 
 define amdgpu_kernel void @k0_f0() {
 ; CHECK-LABEL: define amdgpu_kernel void @k0_f0(
-; CHECK-SAME: ) #[[ATTR2:[0-9]+]] !llvm.amdgcn.lds.kernel.id !2 {
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k0_f0.lds) ]
 ; CHECK-NEXT:    call void @f0_transitive()
 ; CHECK-NEXT:    ret void
@@ -116,8 +116,8 @@ define amdgpu_kernel void @k0_f0() {
 
 define amdgpu_kernel void @k1_f0() {
 ; CHECK-LABEL: define amdgpu_kernel void @k1_f0(
-; CHECK-SAME: ) #[[ATTR3:[0-9]+]] !llvm.amdgcn.lds.kernel.id !3 {
-; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds) ], !alias.scope !4, !noalias !7
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
+; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k1_f0.lds) ], !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
 ; CHECK-NEXT:    call void @f0_transitive()
 ; CHECK-NEXT:    [[FPTR:%.*]] = load volatile ptr, ptr addrspace(1) null, align 8
 ; CHECK-NEXT:    call void [[FPTR]]()
@@ -168,7 +168,7 @@ define internal i16 @mutual_recursion_0(i16 %arg) {
 
 define internal void @mutual_recursion_1(i16 %arg) {
 ; CHECK-LABEL: define internal void @mutual_recursion_1(
-; CHECK-SAME: i16 [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: i16 inreg [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @mutual_recursion_0(i16 [[ARG]])
 ; CHECK-NEXT:    ret void
 ;
@@ -178,7 +178,7 @@ define internal void @mutual_recursion_1(i16 %arg) {
 
 define amdgpu_kernel void @kernel_lds_recursion() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_lds_recursion(
-; CHECK-SAME: ) #[[ATTR2]] !llvm.amdgcn.lds.kernel.id !9 {
+; CHECK-SAME: ) #[[ATTR2]] !llvm.amdgcn.lds.kernel.id [[META9:![0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.kernel_lds_recursion.lds) ]
 ; CHECK-NEXT:    call void @mutual_recursion_0(i16 0)
 ; CHECK-NEXT:    ret void
@@ -199,15 +199,16 @@ define amdgpu_kernel void @kernel_lds_recursion() {
 ; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
 ; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
-; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 1}
-; CHECK: [[META1:![0-9]+]] = !{i32 0}
-; CHECK: [[META2:![0-9]+]] = !{i32 1}
-; CHECK: [[META3:![0-9]+]] = !{!5}
-; CHECK: [[META4:![0-9]+]] = distinct !{!5, !6}
-; CHECK: [[META5:![0-9]+]] = distinct !{!6}
-; CHECK: [[META6:![0-9]+]] = !{!8}
-; CHECK: [[META7:![0-9]+]] = distinct !{!8, !6}
-; CHECK: [[META8:![0-9]+]] = !{i32 2}
+; CHECK: [[META0]] = !{i32 0, i32 1}
+; CHECK: [[META1:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
+; CHECK: [[META2]] = !{i32 0}
+; CHECK: [[META3]] = !{i32 1}
+; CHECK: [[META4]] = !{[[META5:![0-9]+]]}
+; CHECK: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]}
+; CHECK: [[META6]] = distinct !{[[META6]]}
+; CHECK: [[META7]] = !{[[META8:![0-9]+]]}
+; CHECK: [[META8]] = distinct !{[[META8]], [[META6]]}
+; CHECK: [[META9]] = !{i32 2}
 ;.
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; TABLE: {{.*}}

``````````

</details>


https://github.com/llvm/llvm-project/pull/101609