[llvm] c4e5425 - [Remarks] Emit optimization remarks for atomics generating CAS loop

Sat Aug 14 01:09:27 PDT 2021

Please change https://reviews.llvm.org/D106891 to allow write access by
non-AMDGPU users. As I can't comment on the revision, I'm leaving it here:

> New DT and LI calculations in O0 pipeline. If you cannot usefully
preserve them, then you should construct ORE in the pass, which will skip
expensive analysis if diagnostic hotness is not requested.

Regards,
Nikita

On Sat, Aug 14, 2021 at 6:44 AM Anshil Gandhi via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

>
> Author: Anshil Gandhi
> Date: 2021-08-13T22:44:08-06:00
> New Revision: c4e5425aa579d21530ef1766d7144b38a347f247
>
> URL:
> https://github.com/llvm/llvm-project/commit/c4e5425aa579d21530ef1766d7144b38a347f247
> DIFF:
> https://github.com/llvm/llvm-project/commit/c4e5425aa579d21530ef1766d7144b38a347f247.diff
>
> LOG: [Remarks] Emit optimization remarks for atomics generating CAS loop
>
> Implements ORE in AtomicExpandPass to report atomics generating a compare
> and swap loop.
>
> Differential Revision: https://reviews.llvm.org/D106891
>
> Added:
>     clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu
>     clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
>     llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll
>
> Modified:
>     llvm/lib/CodeGen/AtomicExpandPass.cpp
>     llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
>     llvm/test/CodeGen/X86/O0-pipeline.ll
>     llvm/test/CodeGen/X86/opt-pipeline.ll
>
> Removed:
>
>
>
>
> ################################################################################
> diff  --git a/clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu
> b/clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu
> new file mode 100644
> index 0000000000000..96892286fd75e
> --- /dev/null
> +++ b/clang/test/CodeGenCUDA/atomics-remarks-gfx90a.cu
> @@ -0,0 +1,16 @@
> +// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -fcuda-is-device \
> +// RUN:   -target-cpu gfx90a -Rpass=atomic-expand -S -o - 2>&1 | \
> +// RUN:   FileCheck %s --check-prefix=GFX90A-CAS
> +
> +// REQUIRES: amdgpu-registered-target
> +
> +#include "Inputs/cuda.h"
> +#include <stdatomic.h>
> +
> +// GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at system memory scope
> +// GFX90A-CAS-LABEL: _Z14atomic_add_casPf
> +// GFX90A-CAS:  flat_atomic_cmpswap v0, v[2:3], v[4:5] glc
> +// GFX90A-CAS:  s_cbranch_execnz
> +__device__ float atomic_add_cas(float *p) {
> +  return __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
> +}
>
> diff  --git a/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
> b/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
> new file mode 100644
> index 0000000000000..2d8b68f83b9d6
> --- /dev/null
> +++ b/clang/test/CodeGenOpenCL/atomics-remarks-gfx90a.cl
> @@ -0,0 +1,46 @@
> +// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa
> -target-cpu gfx90a \
> +// RUN:     -Rpass=atomic-expand -S -o - 2>&1 | \
> +// RUN:     FileCheck %s --check-prefix=REMARK
> +
> +// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa
> -target-cpu gfx90a \
> +// RUN:     -Rpass=atomic-expand -S -emit-llvm -o - 2>&1 | \
> +// RUN:     FileCheck %s --check-prefix=GFX90A-CAS
> +
> +// REQUIRES: amdgpu-registered-target
> +
> +typedef enum memory_order {
> +  memory_order_relaxed = __ATOMIC_RELAXED,
> +  memory_order_acquire = __ATOMIC_ACQUIRE,
> +  memory_order_release = __ATOMIC_RELEASE,
> +  memory_order_acq_rel = __ATOMIC_ACQ_REL,
> +  memory_order_seq_cst = __ATOMIC_SEQ_CST
> +} memory_order;
> +
> +typedef enum memory_scope {
> +  memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
> +  memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
> +  memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
> +  memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
> +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups)
> +  memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
> +#endif
> +} memory_scope;
> +
> +// REMARK: remark: A compare and swap loop was generated for an atomic
> fadd operation at workgroup-one-as memory scope [-Rpass=atomic-expand]
> +// REMARK: remark: A compare and swap loop was generated for an atomic
> fadd operation at agent-one-as memory scope [-Rpass=atomic-expand]
> +// REMARK: remark: A compare and swap loop was generated for an atomic
> fadd operation at one-as memory scope [-Rpass=atomic-expand]
> +// REMARK: remark: A compare and swap loop was generated for an atomic
> fadd operation at wavefront-one-as memory scope [-Rpass=atomic-expand]
> +// GFX90A-CAS-LABEL: @atomic_cas
> +// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}}
> syncscope("workgroup-one-as") monotonic
> +// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}}
> syncscope("agent-one-as") monotonic
> +// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}}
> syncscope("one-as") monotonic
> +// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}}
> syncscope("wavefront-one-as") monotonic
> +float atomic_cas(__global atomic_float *d, float a) {
> +  float ret1 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed,
> memory_scope_work_group);
> +  float ret2 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed,
> memory_scope_device);
> +  float ret3 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed,
> memory_scope_all_svm_devices);
> +  float ret4 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed,
> memory_scope_sub_group);
> +}
> +
> +
> +
>
> diff  --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp
> b/llvm/lib/CodeGen/AtomicExpandPass.cpp
> index 125a3be585cb5..5b5458e1058e8 100644
> --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
> +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
> @@ -17,6 +17,7 @@
>  #include "llvm/ADT/ArrayRef.h"
>  #include "llvm/ADT/STLExtras.h"
>  #include "llvm/ADT/SmallVector.h"
> +#include "llvm/Analysis/OptimizationRemarkEmitter.h"
>  #include "llvm/CodeGen/AtomicExpandUtils.h"
>  #include "llvm/CodeGen/RuntimeLibcalls.h"
>  #include "llvm/CodeGen/TargetLowering.h"
> @@ -58,6 +59,7 @@ namespace {
>
>    class AtomicExpand: public FunctionPass {
>      const TargetLowering *TLI = nullptr;
> +    OptimizationRemarkEmitter *ORE;
>
>    public:
>      static char ID; // Pass identification, replacement for typeid
> @@ -69,6 +71,7 @@ namespace {
>      bool runOnFunction(Function &F) override;
>
>    private:
> +    void getAnalysisUsage(AnalysisUsage &AU) const override;
>      bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
>      IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout
> &DL);
>      LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
> @@ -165,11 +168,16 @@ static bool atomicSizeSupported(const TargetLowering
> *TLI, Inst *I) {
>           Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
>  }
>
> +void AtomicExpand::getAnalysisUsage(AnalysisUsage &AU) const {
> +  AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
> +}
> +
>  bool AtomicExpand::runOnFunction(Function &F) {
>    auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
>    if (!TPC)
>      return false;
>
> +  ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
>    auto &TM = TPC->getTM<TargetMachine>();
>    if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
>      return false;
> @@ -570,7 +578,9 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op,
> IRBuilder<> &Builder,
>  }
>
>  bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
> -  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
> +  LLVMContext &Ctx = AI->getModule()->getContext();
> +  TargetLowering::AtomicExpansionKind Kind =
> TLI->shouldExpandAtomicRMWInIR(AI);
> +  switch (Kind) {
>    case TargetLoweringBase::AtomicExpansionKind::None:
>      return false;
>    case TargetLoweringBase::AtomicExpansionKind::LLSC: {
> @@ -600,6 +610,17 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst
> *AI) {
>        expandPartwordAtomicRMW(AI,
>
>  TargetLoweringBase::AtomicExpansionKind::CmpXChg);
>      } else {
> +      SmallVector<StringRef> SSNs;
> +      Ctx.getSyncScopeNames(SSNs);
> +      auto MemScope = SSNs[AI->getSyncScopeID()].empty()
> +                          ? "system"
> +                          : SSNs[AI->getSyncScopeID()];
> +      ORE->emit([&]() {
> +        return OptimizationRemark(DEBUG_TYPE, "Passed", AI->getFunction())
> +               << "A compare and swap loop was generated for an atomic "
> +               << AI->getOperationName(AI->getOperation()) << " operation
> at "
> +               << MemScope << " memory scope";
> +      });
>        expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
>      }
>      return true;
>
> diff  --git a/llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll
> b/llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll
> new file mode 100644
> index 0000000000000..240963cfe9009
> --- /dev/null
> +++ b/llvm/test/CodeGen/AMDGPU/atomics-remarks-gfx90a.ll
> @@ -0,0 +1,103 @@
> +; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs
> --pass-remarks=atomic-expand \
> +; RUN:      %s -o - 2>&1 | FileCheck %s --check-prefix=GFX90A-CAS
> +
> +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at system memory scope
> +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at agent memory scope
> +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at workgroup memory scope
> +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at wavefront memory scope
> +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at singlethread memory scope
> +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at one-as memory scope
> +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at agent-one-as memory scope
> +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at workgroup-one-as memory scope
> +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at wavefront-one-as memory scope
> +; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd
> operation at singlethread-one-as memory scope
> +
> +; GFX90A-CAS-LABEL: atomic_add_cas:
> +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
> +; GFX90A-CAS: s_cbranch_execnz
> +define dso_local void @atomic_add_cas(float* %p, float %q) {
> +entry:
> +  %ret = atomicrmw fadd float* %p, float %q monotonic, align 4
> +  ret void
> +}
> +
> +; GFX90A-CAS-LABEL: atomic_add_cas_agent:
> +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
> +; GFX90A-CAS: s_cbranch_execnz
> +define dso_local void @atomic_add_cas_agent(float* %p, float %q) {
> +entry:
> +  %ret = atomicrmw fadd float* %p, float %q syncscope("agent") monotonic,
> align 4
> +  ret void
> +}
> +
> +; GFX90A-CAS-LABEL: atomic_add_cas_workgroup:
> +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
> +; GFX90A-CAS: s_cbranch_execnz
> +define dso_local void @atomic_add_cas_workgroup(float* %p, float %q) {
> +entry:
> +  %ret = atomicrmw fadd float* %p, float %q syncscope("workgroup")
> monotonic, align 4
> +  ret void
> +}
> +
> +; GFX90A-CAS-LABEL: atomic_add_cas_wavefront:
> +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
> +; GFX90A-CAS: s_cbranch_execnz
> +define dso_local void @atomic_add_cas_wavefront(float* %p, float %q) {
> +entry:
> +  %ret = atomicrmw fadd float* %p, float %q syncscope("wavefront")
> monotonic, align 4
> +  ret void
> +}
> +
> +; GFX90A-CAS-LABEL: atomic_add_cas_singlethread:
> +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
> +; GFX90A-CAS: s_cbranch_execnz
> +define dso_local void @atomic_add_cas_singlethread(float* %p, float %q) {
> +entry:
> +  %ret = atomicrmw fadd float* %p, float %q syncscope("singlethread")
> monotonic, align 4
> +  ret void
> +}
> +
> +; GFX90A-CAS-LABEL: atomic_add_cas_one_as:
> +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
> +; GFX90A-CAS: s_cbranch_execnz
> +define dso_local void @atomic_add_cas_one_as(float* %p, float %q) {
> +entry:
> +  %ret = atomicrmw fadd float* %p, float %q syncscope("one-as")
> monotonic, align 4
> +  ret void
> +}
> +
> +; GFX90A-CAS-LABEL: atomic_add_cas_agent_one_as:
> +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
> +; GFX90A-CAS: s_cbranch_execnz
> +define dso_local void @atomic_add_cas_agent_one_as(float* %p, float %q) {
> +entry:
> +  %ret = atomicrmw fadd float* %p, float %q syncscope("agent-one-as")
> monotonic, align 4
> +  ret void
> +}
> +
> +; GFX90A-CAS-LABEL: atomic_add_cas_workgroup_one_as:
> +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
> +; GFX90A-CAS: s_cbranch_execnz
> +define dso_local void @atomic_add_cas_workgroup_one_as(float* %p, float
> %q) {
> +entry:
> +  %ret = atomicrmw fadd float* %p, float %q syncscope("workgroup-one-as")
> monotonic, align 4
> +  ret void
> +}
> +
> +; GFX90A-CAS-LABEL: atomic_add_cas_wavefront_one_as:
> +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
> +; GFX90A-CAS: s_cbranch_execnz
> +define dso_local void @atomic_add_cas_wavefront_one_as(float* %p, float
> %q) {
> +entry:
> +  %ret = atomicrmw fadd float* %p, float %q syncscope("wavefront-one-as")
> monotonic, align 4
> +  ret void
> +}
> +
> +; GFX90A-CAS-LABEL: atomic_add_cas_singlethread_one_as:
> +; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
> +; GFX90A-CAS: s_cbranch_execnz
> +define dso_local void @atomic_add_cas_singlethread_one_as(float* %p,
> float %q) {
> +entry:
> +  %ret = atomicrmw fadd float* %p, float %q
> syncscope("singlethread-one-as") monotonic, align 4
> +  ret void
> +}
>
> diff  --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
> b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
> index 73909dc918f0a..dba871eee99fd 100644
> --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
> +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
> @@ -44,6 +44,11 @@
>  ; GCN-O0-NEXT:    Lower OpenCL enqueued blocks
>  ; GCN-O0-NEXT:    Lower uses of LDS variables from non-kernel functions
>  ; GCN-O0-NEXT:    FunctionPass Manager
> +; GCN-O0-NEXT:      Dominator Tree Construction
> +; GCN-O0-NEXT:      Natural Loop Information
> +; GCN-O0-NEXT:      Lazy Branch Probability Analysis
> +; GCN-O0-NEXT:      Lazy Block Frequency Analysis
> +; GCN-O0-NEXT:      Optimization Remark Emitter
>  ; GCN-O0-NEXT:      Expand Atomic instructions
>  ; GCN-O0-NEXT:      Lower constant intrinsics
>  ; GCN-O0-NEXT:      Remove unreachable blocks from the CFG
> @@ -180,6 +185,11 @@
>  ; GCN-O1-NEXT:    Lower uses of LDS variables from non-kernel functions
>  ; GCN-O1-NEXT:    FunctionPass Manager
>  ; GCN-O1-NEXT:      Infer address spaces
> +; GCN-O1-NEXT:      Dominator Tree Construction
> +; GCN-O1-NEXT:      Natural Loop Information
> +; GCN-O1-NEXT:      Lazy Branch Probability Analysis
> +; GCN-O1-NEXT:      Lazy Block Frequency Analysis
> +; GCN-O1-NEXT:      Optimization Remark Emitter
>  ; GCN-O1-NEXT:      Expand Atomic instructions
>  ; GCN-O1-NEXT:      AMDGPU Promote Alloca
>  ; GCN-O1-NEXT:      Dominator Tree Construction
> @@ -431,6 +441,11 @@
>  ; GCN-O1-OPTS-NEXT:    Lower uses of LDS variables from non-kernel
> functions
>  ; GCN-O1-OPTS-NEXT:    FunctionPass Manager
>  ; GCN-O1-OPTS-NEXT:      Infer address spaces
> +; GCN-O1-OPTS-NEXT:      Dominator Tree Construction
> +; GCN-O1-OPTS-NEXT:      Natural Loop Information
> +; GCN-O1-OPTS-NEXT:      Lazy Branch Probability Analysis
> +; GCN-O1-OPTS-NEXT:      Lazy Block Frequency Analysis
> +; GCN-O1-OPTS-NEXT:      Optimization Remark Emitter
>  ; GCN-O1-OPTS-NEXT:      Expand Atomic instructions
>  ; GCN-O1-OPTS-NEXT:      AMDGPU Promote Alloca
>  ; GCN-O1-OPTS-NEXT:      Dominator Tree Construction
> @@ -715,6 +730,11 @@
>  ; GCN-O2-NEXT:    Lower uses of LDS variables from non-kernel functions
>  ; GCN-O2-NEXT:    FunctionPass Manager
>  ; GCN-O2-NEXT:      Infer address spaces
> +; GCN-O2-NEXT:      Dominator Tree Construction
> +; GCN-O2-NEXT:      Natural Loop Information
> +; GCN-O2-NEXT:      Lazy Branch Probability Analysis
> +; GCN-O2-NEXT:      Lazy Block Frequency Analysis
> +; GCN-O2-NEXT:      Optimization Remark Emitter
>  ; GCN-O2-NEXT:      Expand Atomic instructions
>  ; GCN-O2-NEXT:      AMDGPU Promote Alloca
>  ; GCN-O2-NEXT:      Dominator Tree Construction
> @@ -1001,6 +1021,11 @@
>  ; GCN-O3-NEXT:    Lower uses of LDS variables from non-kernel functions
>  ; GCN-O3-NEXT:    FunctionPass Manager
>  ; GCN-O3-NEXT:      Infer address spaces
> +; GCN-O3-NEXT:      Dominator Tree Construction
> +; GCN-O3-NEXT:      Natural Loop Information
> +; GCN-O3-NEXT:      Lazy Branch Probability Analysis
> +; GCN-O3-NEXT:      Lazy Block Frequency Analysis
> +; GCN-O3-NEXT:      Optimization Remark Emitter
>  ; GCN-O3-NEXT:      Expand Atomic instructions
>  ; GCN-O3-NEXT:      AMDGPU Promote Alloca
>  ; GCN-O3-NEXT:      Dominator Tree Construction
>
> diff  --git a/llvm/test/CodeGen/X86/O0-pipeline.ll
> b/llvm/test/CodeGen/X86/O0-pipeline.ll
> index bf3ae61660757..8f0275706996a 100644
> --- a/llvm/test/CodeGen/X86/O0-pipeline.ll
> +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll
> @@ -10,13 +10,18 @@
>  ; CHECK-NEXT: Target Pass Configuration
>  ; CHECK-NEXT: Machine Module Information
>  ; CHECK-NEXT: Target Transform Information
> +; CHECK-NEXT: Profile summary info
>  ; CHECK-NEXT: Create Garbage Collector Module Metadata
>  ; CHECK-NEXT: Assumption Cache Tracker
> -; CHECK-NEXT: Profile summary info
>  ; CHECK-NEXT: Machine Branch Probability Analysis
>  ; CHECK-NEXT:   ModulePass Manager
>  ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
>  ; CHECK-NEXT:     FunctionPass Manager
> +; CHECK-NEXT:       Dominator Tree Construction
> +; CHECK-NEXT:       Natural Loop Information
> +; CHECK-NEXT:       Lazy Branch Probability Analysis
> +; CHECK-NEXT:       Lazy Block Frequency Analysis
> +; CHECK-NEXT:       Optimization Remark Emitter
>  ; CHECK-NEXT:       Expand Atomic instructions
>  ; CHECK-NEXT:       Lower AMX intrinsics
>  ; CHECK-NEXT:       Lower AMX type for load/store
>
> diff  --git a/llvm/test/CodeGen/X86/opt-pipeline.ll
> b/llvm/test/CodeGen/X86/opt-pipeline.ll
> index c809433a2fff8..a480d901160fc 100644
> --- a/llvm/test/CodeGen/X86/opt-pipeline.ll
> +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll
> @@ -16,15 +16,20 @@
>  ; CHECK-NEXT: Target Pass Configuration
>  ; CHECK-NEXT: Machine Module Information
>  ; CHECK-NEXT: Target Transform Information
> +; CHECK-NEXT: Profile summary info
>  ; CHECK-NEXT: Type-Based Alias Analysis
>  ; CHECK-NEXT: Scoped NoAlias Alias Analysis
>  ; CHECK-NEXT: Assumption Cache Tracker
> -; CHECK-NEXT: Profile summary info
>  ; CHECK-NEXT: Create Garbage Collector Module Metadata
>  ; CHECK-NEXT: Machine Branch Probability Analysis
>  ; CHECK-NEXT:   ModulePass Manager
>  ; CHECK-NEXT:     Pre-ISel Intrinsic Lowering
>  ; CHECK-NEXT:     FunctionPass Manager
> +; CHECK-NEXT:       Dominator Tree Construction
> +; CHECK-NEXT:       Natural Loop Information
> +; CHECK-NEXT:       Lazy Branch Probability Analysis
> +; CHECK-NEXT:       Lazy Block Frequency Analysis
> +; CHECK-NEXT:       Optimization Remark Emitter
>  ; CHECK-NEXT:       Expand Atomic instructions
>  ; CHECK-NEXT:       Lower AMX intrinsics
>  ; CHECK-NEXT:       Lower AMX type for load/store
>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210814/5ca02ada/attachment-0001.html>