[llvm] [AMDGPU][NewPM] Format llc-pipeline-npm.ll better (PR #174161)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 1 22:27:40 PST 2026
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/174161
>From 854db8a219bcfe72cccd7822358e8a6f04ba86e3 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Thu, 1 Jan 2026 21:30:08 +0000
Subject: [PATCH 1/2] [AMDGPU][NewPM] Format llc-pipeline-npm.ll better
This patch makes it so that we print the passes on separate lines which
makes it much easier to read the piepline and look at diffs with most
tooling.
---
llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 428 ++++++++++++++++++-
1 file changed, 421 insertions(+), 7 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 4be7aea94b561..2979b4fea1c46 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -1,19 +1,433 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -enable-new-pm -mtriple=amdgcn--amdhsa -O0 -print-pipeline-passes < %s 2>&1 \
-; RUN: | FileCheck -check-prefix=GCN-O0 %s
+; RUN: | sed 's/,/,\n/g' | FileCheck -check-prefix=GCN-O0 %s
; RUN: llc -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \
-; RUN: | FileCheck -check-prefix=GCN-O2 %s
+; RUN: | sed 's/,/,\n/g' | FileCheck -check-prefix=GCN-O2 %s
; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < %s 2>&1 \
-; RUN: | FileCheck -check-prefix=GCN-O3 %s
+; RUN: | sed 's/,/,\n/g' | FileCheck -check-prefix=GCN-O3 %s
+; GCN-O0: require<MachineModuleAnalysis>,
+; GCN-O0: require<profile-summary>,
+; GCN-O0: require<collector-metadata>,
+; GCN-O0: require<runtime-libcall-info>,
+; GCN-O0: pre-isel-intrinsic-lowering,
+; GCN-O0: function(expand-ir-insts<O0>),
+; GCN-O0: amdgpu-remove-incompatible-functions,
+; GCN-O0: amdgpu-printf-runtime-binding,
+; GCN-O0: amdgpu-lower-ctor-dtor,
+; GCN-O0: function(amdgpu-uniform-intrinsic-combine),
+; GCN-O0: expand-variadics,
+; GCN-O0: amdgpu-always-inline,
+; GCN-O0: always-inline,
+; GCN-O0: amdgpu-export-kernel-runtime-handles,
+; GCN-O0: amdgpu-lower-exec-sync,
+; GCN-O0: amdgpu-sw-lower-lds,
+; GCN-O0: amdgpu-lower-module-lds,
+; GCN-O0: function(atomic-expand,
+; GCN-O0: verify,
+; GCN-O0: unreachableblockelim,
+; GCN-O0: ee-instrument<post-inline>,
+; GCN-O0: scalarize-masked-mem-intrin,
+; GCN-O0: expand-reductions,
+; GCN-O0: amdgpu-lower-kernel-arguments),
+; GCN-O0: amdgpu-lower-buffer-fat-pointers,
+; GCN-O0: amdgpu-lower-intrinsics,
+; GCN-O0: cgscc(function(lower-switch,
+; GCN-O0: lower-invoke,
+; GCN-O0: unreachableblockelim)),
+; GCN-O0: require<amdgpu-argument-usage>,
+; GCN-O0: cgscc(function(amdgpu-unify-divergent-exit-nodes,
+; GCN-O0: fix-irreducible,
+; GCN-O0: unify-loop-exits,
+; GCN-O0: StructurizeCFGPass,
+; GCN-O0: amdgpu-annotate-uniform,
+; GCN-O0: si-annotate-control-flow,
+; GCN-O0: amdgpu-rewrite-undef-for-phi,
+; GCN-O0: lcssa,
+; GCN-O0: require<uniformity>,
+; GCN-O0: callbr-prepare,
+; GCN-O0: safe-stack,
+; GCN-O0: stack-protector,
+; GCN-O0: verify)),
+; GCN-O0: cgscc(function(machine-function(amdgpu-isel,
+; GCN-O0: si-fix-sgpr-copies,
+; GCN-O0: si-i1-copies,
+; GCN-O0: finalize-isel,
+; GCN-O0: localstackalloc))),
+; GCN-O0: require<reg-usage>,
+; GCN-O0: cgscc(function(machine-function(reg-usage-propagation,
+; GCN-O0: phi-node-elimination,
+; GCN-O0: two-address-instruction,
+; GCN-O0: regallocfast,
+; GCN-O0: si-fix-vgpr-copies,
+; GCN-O0: remove-redundant-debug-values,
+; GCN-O0: fixup-statepoint-caller-saved,
+; GCN-O0: prolog-epilog,
+; GCN-O0: post-ra-pseudos,
+; GCN-O0: si-post-ra-bundler,
+; GCN-O0: fentry-insert,
+; GCN-O0: xray-instrumentation,
+; GCN-O0: si-memory-legalizer,
+; GCN-O0: si-insert-waitcnts,
+; GCN-O0: si-mode-register,
+; GCN-O0: si-late-branch-lowering,
+; GCN-O0: post-RA-hazard-rec,
+; GCN-O0: amdgpu-wait-sgpr-hazards,
+; GCN-O0: amdgpu-lower-vgpr-encoding,
+; GCN-O0: branch-relaxation))),
+; GCN-O0: require<reg-usage>,
+; GCN-O0: cgscc(function(machine-function(reg-usage-collector,
+; GCN-O0: remove-loads-into-fake-uses,
+; GCN-O0: live-debug-values,
+; GCN-O0: machine-sanmd,
+; GCN-O0: amdgpu-preload-kern-arg-prolog,
+; GCN-O0: stack-frame-layout,
+; GCN-O0: verify),
+; GCN-O0: free-machine-function))
-; GCN-O0: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-ir-insts<O0>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(atomic-expand,verify,unreachableblockelim,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,amdgpu-lower-kernel-arguments),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa,require<uniformity>,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,localstackalloc))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,phi-node-elimination,two-address-instruction,regallocfast,si-fix-vgpr-copies,remove-redundant-debug-values,fixup-statepoint-caller-saved,prolog-epilog,post-ra-pseudos,si-post-ra-bundler,fentry-insert,xray-instrumentation,si-memory-legalizer,si-insert-waitcnts,si-mode-register,si-late-branch-lowering,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,branch-relaxation))),require<reg-usage>,cgscc(function(machine-function(reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,amdgpu-preload-kern-arg-prolog,stack-frame-layout,verify),free-machine-function))
+; GCN-O2: require<MachineModuleAnalysis>,
+; GCN-O2: require<profile-summary>,
+; GCN-O2: require<collector-metadata>,
+; GCN-O2: require<runtime-libcall-info>,
+; GCN-O2: pre-isel-intrinsic-lowering,
+; GCN-O2: function(expand-ir-insts<O2>),
+; GCN-O2: amdgpu-remove-incompatible-functions,
+; GCN-O2: amdgpu-printf-runtime-binding,
+; GCN-O2: amdgpu-lower-ctor-dtor,
+; GCN-O2: function(amdgpu-image-intrinsic-opt,
+; GCN-O2: amdgpu-uniform-intrinsic-combine),
+; GCN-O2: expand-variadics,
+; GCN-O2: amdgpu-always-inline,
+; GCN-O2: always-inline,
+; GCN-O2: amdgpu-export-kernel-runtime-handles,
+; GCN-O2: amdgpu-lower-exec-sync,
+; GCN-O2: amdgpu-sw-lower-lds,
+; GCN-O2: amdgpu-lower-module-lds,
+; GCN-O2: function(amdgpu-atomic-optimizer,
+; GCN-O2: atomic-expand,
+; GCN-O2: amdgpu-promote-alloca,
+; GCN-O2: separate-const-offset-from-gep<>,
+; GCN-O2: slsr,
+; GCN-O2: early-cse<>,
+; GCN-O2: nary-reassociate,
+; GCN-O2: early-cse<>,
+; GCN-O2: amdgpu-codegenprepare,
+; GCN-O2: loop-mssa(licm<allowspeculation>),
+; GCN-O2: verify,
+; GCN-O2: loop-mssa(canon-freeze,
+; GCN-O2: loop-reduce),
+; GCN-O2: mergeicmps,
+; GCN-O2: expand-memcmp,
+; GCN-O2: unreachableblockelim,
+; GCN-O2: consthoist,
+; GCN-O2: replace-with-veclib,
+; GCN-O2: partially-inline-libcalls,
+; GCN-O2: ee-instrument<post-inline>,
+; GCN-O2: scalarize-masked-mem-intrin,
+; GCN-O2: expand-reductions,
+; GCN-O2: early-cse<>),
+; GCN-O2: amdgpu-preload-kernel-arguments,
+; GCN-O2: function(amdgpu-lower-kernel-arguments,
+; GCN-O2: codegenprepare,
+; GCN-O2: load-store-vectorizer),
+; GCN-O2: amdgpu-lower-buffer-fat-pointers,
+; GCN-O2: amdgpu-lower-intrinsics,
+; GCN-O2: cgscc(function(lower-switch,
+; GCN-O2: lower-invoke,
+; GCN-O2: unreachableblockelim)),
+; GCN-O2: require<amdgpu-argument-usage>,
+; GCN-O2: cgscc(function(flatten-cfg,
+; GCN-O2: sink,
+; GCN-O2: amdgpu-late-codegenprepare,
+; GCN-O2: amdgpu-unify-divergent-exit-nodes,
+; GCN-O2: fix-irreducible,
+; GCN-O2: unify-loop-exits,
+; GCN-O2: StructurizeCFGPass,
+; GCN-O2: amdgpu-annotate-uniform,
+; GCN-O2: si-annotate-control-flow,
+; GCN-O2: amdgpu-rewrite-undef-for-phi,
+; GCN-O2: lcssa)),
+; GCN-O2: amdgpu-perf-hint,
+; GCN-O2: cgscc(function(require<uniformity>,
+; GCN-O2: objc-arc-contract,
+; GCN-O2: callbr-prepare,
+; GCN-O2: safe-stack,
+; GCN-O2: stack-protector,
+; GCN-O2: verify)),
+; GCN-O2: cgscc(function(machine-function(amdgpu-isel,
+; GCN-O2: si-fix-sgpr-copies,
+; GCN-O2: si-i1-copies,
+; GCN-O2: finalize-isel,
+; GCN-O2: early-tailduplication,
+; GCN-O2: opt-phis,
+; GCN-O2: stack-coloring,
+; GCN-O2: localstackalloc,
+; GCN-O2: dead-mi-elimination,
+; GCN-O2: early-machinelicm,
+; GCN-O2: machine-cse,
+; GCN-O2: machine-sink,
+; GCN-O2: peephole-opt,
+; GCN-O2: dead-mi-elimination,
+; GCN-O2: si-fold-operands,
+; GCN-O2: gcn-dpp-combine,
+; GCN-O2: si-load-store-opt,
+; GCN-O2: si-peephole-sdwa,
+; GCN-O2: early-machinelicm,
+; GCN-O2: machine-cse,
+; GCN-O2: si-fold-operands,
+; GCN-O2: dead-mi-elimination,
+; GCN-O2: si-shrink-instructions))),
+; GCN-O2: require<reg-usage>,
+; GCN-O2: cgscc(function(machine-function(reg-usage-propagation,
+; GCN-O2: amdgpu-prepare-agpr-alloc,
+; GCN-O2: detect-dead-lanes,
+; GCN-O2: dead-mi-elimination,
+; GCN-O2: init-undef,
+; GCN-O2: process-imp-defs,
+; GCN-O2: unreachable-mbb-elimination,
+; GCN-O2: require<live-vars>,
+; GCN-O2: si-opt-vgpr-liverange,
+; GCN-O2: require<machine-loops>,
+; GCN-O2: phi-node-elimination,
+; GCN-O2: si-lower-control-flow,
+; GCN-O2: two-address-instruction,
+; GCN-O2: register-coalescer,
+; GCN-O2: rename-independent-subregs,
+; GCN-O2: amdgpu-rewrite-partial-reg-uses,
+; GCN-O2: machine-scheduler,
+; GCN-O2: amdgpu-pre-ra-optimizations,
+; GCN-O2: si-wqm,
+; GCN-O2: si-optimize-exec-masking-pre-ra,
+; GCN-O2: si-form-memory-clauses,
+; GCN-O2: amdgpu-pre-ra-long-branch-reg,
+; GCN-O2: greedy<sgpr>,
+; GCN-O2: virt-reg-rewriter<no-clear-vregs>,
+; GCN-O2: stack-slot-coloring,
+; GCN-O2: si-lower-sgpr-spills,
+; GCN-O2: si-pre-allocate-wwm-regs,
+; GCN-O2: greedy<wwm>,
+; GCN-O2: si-lower-wwm-copies,
+; GCN-O2: virt-reg-rewriter<no-clear-vregs>,
+; GCN-O2: amdgpu-reserve-wwm-regs,
+; GCN-O2: greedy<vgpr>,
+; GCN-O2: amdgpu-nsa-reassign,
+; GCN-O2: virt-reg-rewriter,
+; GCN-O2: amdgpu-mark-last-scratch-load,
+; GCN-O2: stack-slot-coloring,
+; GCN-O2: machine-cp,
+; GCN-O2: machinelicm,
+; GCN-O2: si-fix-vgpr-copies,
+; GCN-O2: si-optimize-exec-masking,
+; GCN-O2: remove-redundant-debug-values,
+; GCN-O2: fixup-statepoint-caller-saved,
+; GCN-O2: postra-machine-sink,
+; GCN-O2: shrink-wrap,
+; GCN-O2: prolog-epilog,
+; GCN-O2: machine-latecleanup,
+; GCN-O2: branch-folder,
+; GCN-O2: tailduplication,
+; GCN-O2: machine-cp,
+; GCN-O2: post-ra-pseudos,
+; GCN-O2: si-shrink-instructions,
+; GCN-O2: si-post-ra-bundler,
+; GCN-O2: postmisched,
+; GCN-O2: block-placement,
+; GCN-O2: fentry-insert,
+; GCN-O2: xray-instrumentation,
+; GCN-O2: gcn-create-vopd,
+; GCN-O2: si-memory-legalizer,
+; GCN-O2: si-insert-waitcnts,
+; GCN-O2: si-mode-register,
+; GCN-O2: si-insert-hard-clauses,
+; GCN-O2: si-late-branch-lowering,
+; GCN-O2: si-pre-emit-peephole,
+; GCN-O2: post-RA-hazard-rec,
+; GCN-O2: amdgpu-wait-sgpr-hazards,
+; GCN-O2: amdgpu-lower-vgpr-encoding,
+; GCN-O2: amdgpu-insert-delay-alu,
+; GCN-O2: branch-relaxation))),
+; GCN-O2: require<reg-usage>,
+; GCN-O2: cgscc(function(machine-function(reg-usage-collector,
+; GCN-O2: remove-loads-into-fake-uses,
+; GCN-O2: live-debug-values,
+; GCN-O2: machine-sanmd,
+; GCN-O2: amdgpu-preload-kern-arg-prolog,
+; GCN-O2: stack-frame-layout,
+; GCN-O2: verify),
+; GCN-O2: free-machine-function))
-; GCN-O2: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-ir-insts<O2>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,early-cse<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,early-cse<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,stack-slot-coloring,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,machine-latecleanup,branch-folder,tailduplication,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-mode-register,si-insert-hard-clauses,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation))),require<reg-usage>,cgscc(function(machine-function(reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,amdgpu-preload-kern-arg-prolog,stack-frame-layout,verify),free-machine-function))
-
-; GCN-O3: require<MachineModuleAnalysis>,require<profile-summary>,require<collector-metadata>,require<runtime-libcall-info>,pre-isel-intrinsic-lowering,function(expand-ir-insts<O3>),amdgpu-remove-incompatible-functions,amdgpu-printf-runtime-binding,amdgpu-lower-ctor-dtor,function(amdgpu-image-intrinsic-opt,amdgpu-uniform-intrinsic-combine),expand-variadics,amdgpu-always-inline,always-inline,amdgpu-export-kernel-runtime-handles,amdgpu-lower-exec-sync,amdgpu-sw-lower-lds,amdgpu-lower-module-lds,function(amdgpu-atomic-optimizer,atomic-expand,amdgpu-promote-alloca,separate-const-offset-from-gep<>,slsr,gvn<>,nary-reassociate,early-cse<>,amdgpu-codegenprepare,loop-mssa(licm<allowspeculation>),verify,loop-mssa(canon-freeze,loop-reduce),mergeicmps,expand-memcmp,unreachableblockelim,consthoist,replace-with-veclib,partially-inline-libcalls,ee-instrument<post-inline>,scalarize-masked-mem-intrin,expand-reductions,gvn<>),amdgpu-preload-kernel-arguments,function(amdgpu-lower-kernel-arguments,codegenprepare,load-store-vectorizer),amdgpu-lower-buffer-fat-pointers,amdgpu-lower-intrinsics,cgscc(function(lower-switch,lower-invoke,unreachableblockelim)),require<amdgpu-argument-usage>,cgscc(function(flatten-cfg,sink,amdgpu-late-codegenprepare,amdgpu-unify-divergent-exit-nodes,fix-irreducible,unify-loop-exits,StructurizeCFGPass,amdgpu-annotate-uniform,si-annotate-control-flow,amdgpu-rewrite-undef-for-phi,lcssa)),amdgpu-perf-hint,cgscc(function(require<uniformity>,objc-arc-contract,callbr-prepare,safe-stack,stack-protector,verify)),cgscc(function(machine-function(amdgpu-isel,si-fix-sgpr-copies,si-i1-copies,finalize-isel,early-tailduplication,opt-phis,stack-coloring,localstackalloc,dead-mi-elimination,early-machinelicm,machine-cse,machine-sink,peephole-opt,dead-mi-elimination,si-fold-operands,gcn-dpp-combine,si-load-store-opt,si-peephole-sdwa,early-machinelicm,machine-cse,si-fold-operands,dead-mi-elimination,si-shrink-instructions))),require<reg-usage>,cgscc(function(machine-function(reg-usage-propagation,amdgpu-prepare-agpr-alloc,detect-dead-lanes,dead-mi-elimination,init-undef,process-imp-defs,unreachable-mbb-elimination,require<live-vars>,si-opt-vgpr-liverange,require<machine-loops>,phi-node-elimination,si-lower-control-flow,two-address-instruction,register-coalescer,rename-independent-subregs,amdgpu-rewrite-partial-reg-uses,machine-scheduler,amdgpu-pre-ra-optimizations,si-wqm,si-optimize-exec-masking-pre-ra,si-form-memory-clauses,amdgpu-pre-ra-long-branch-reg,greedy<sgpr>,virt-reg-rewriter<no-clear-vregs>,stack-slot-coloring,si-lower-sgpr-spills,si-pre-allocate-wwm-regs,greedy<wwm>,si-lower-wwm-copies,virt-reg-rewriter<no-clear-vregs>,amdgpu-reserve-wwm-regs,greedy<vgpr>,amdgpu-nsa-reassign,virt-reg-rewriter,amdgpu-mark-last-scratch-load,stack-slot-coloring,machine-cp,machinelicm,si-fix-vgpr-copies,si-optimize-exec-masking,remove-redundant-debug-values,fixup-statepoint-caller-saved,postra-machine-sink,shrink-wrap,prolog-epilog,machine-latecleanup,branch-folder,tailduplication,machine-cp,post-ra-pseudos,si-shrink-instructions,si-post-ra-bundler,postmisched,block-placement,fentry-insert,xray-instrumentation,gcn-create-vopd,si-memory-legalizer,si-insert-waitcnts,si-mode-register,si-insert-hard-clauses,si-late-branch-lowering,si-pre-emit-peephole,post-RA-hazard-rec,amdgpu-wait-sgpr-hazards,amdgpu-lower-vgpr-encoding,amdgpu-insert-delay-alu,branch-relaxation))),require<reg-usage>,cgscc(function(machine-function(reg-usage-collector,remove-loads-into-fake-uses,live-debug-values,machine-sanmd,amdgpu-preload-kern-arg-prolog,stack-frame-layout,verify),free-machine-function))
+; GCN-O3: require<MachineModuleAnalysis>,
+; GCN-O3: require<profile-summary>,
+; GCN-O3: require<collector-metadata>,
+; GCN-O3: require<runtime-libcall-info>,
+; GCN-O3: pre-isel-intrinsic-lowering,
+; GCN-O3: function(expand-ir-insts<O3>),
+; GCN-O3: amdgpu-remove-incompatible-functions,
+; GCN-O3: amdgpu-printf-runtime-binding,
+; GCN-O3: amdgpu-lower-ctor-dtor,
+; GCN-O3: function(amdgpu-image-intrinsic-opt,
+; GCN-O3: amdgpu-uniform-intrinsic-combine),
+; GCN-O3: expand-variadics,
+; GCN-O3: amdgpu-always-inline,
+; GCN-O3: always-inline,
+; GCN-O3: amdgpu-export-kernel-runtime-handles,
+; GCN-O3: amdgpu-lower-exec-sync,
+; GCN-O3: amdgpu-sw-lower-lds,
+; GCN-O3: amdgpu-lower-module-lds,
+; GCN-O3: function(amdgpu-atomic-optimizer,
+; GCN-O3: atomic-expand,
+; GCN-O3: amdgpu-promote-alloca,
+; GCN-O3: separate-const-offset-from-gep<>,
+; GCN-O3: slsr,
+; GCN-O3: gvn<>,
+; GCN-O3: nary-reassociate,
+; GCN-O3: early-cse<>,
+; GCN-O3: amdgpu-codegenprepare,
+; GCN-O3: loop-mssa(licm<allowspeculation>),
+; GCN-O3: verify,
+; GCN-O3: loop-mssa(canon-freeze,
+; GCN-O3: loop-reduce),
+; GCN-O3: mergeicmps,
+; GCN-O3: expand-memcmp,
+; GCN-O3: unreachableblockelim,
+; GCN-O3: consthoist,
+; GCN-O3: replace-with-veclib,
+; GCN-O3: partially-inline-libcalls,
+; GCN-O3: ee-instrument<post-inline>,
+; GCN-O3: scalarize-masked-mem-intrin,
+; GCN-O3: expand-reductions,
+; GCN-O3: gvn<>),
+; GCN-O3: amdgpu-preload-kernel-arguments,
+; GCN-O3: function(amdgpu-lower-kernel-arguments,
+; GCN-O3: codegenprepare,
+; GCN-O3: load-store-vectorizer),
+; GCN-O3: amdgpu-lower-buffer-fat-pointers,
+; GCN-O3: amdgpu-lower-intrinsics,
+; GCN-O3: cgscc(function(lower-switch,
+; GCN-O3: lower-invoke,
+; GCN-O3: unreachableblockelim)),
+; GCN-O3: require<amdgpu-argument-usage>,
+; GCN-O3: cgscc(function(flatten-cfg,
+; GCN-O3: sink,
+; GCN-O3: amdgpu-late-codegenprepare,
+; GCN-O3: amdgpu-unify-divergent-exit-nodes,
+; GCN-O3: fix-irreducible,
+; GCN-O3: unify-loop-exits,
+; GCN-O3: StructurizeCFGPass,
+; GCN-O3: amdgpu-annotate-uniform,
+; GCN-O3: si-annotate-control-flow,
+; GCN-O3: amdgpu-rewrite-undef-for-phi,
+; GCN-O3: lcssa)),
+; GCN-O3: amdgpu-perf-hint,
+; GCN-O3: cgscc(function(require<uniformity>,
+; GCN-O3: objc-arc-contract,
+; GCN-O3: callbr-prepare,
+; GCN-O3: safe-stack,
+; GCN-O3: stack-protector,
+; GCN-O3: verify)),
+; GCN-O3: cgscc(function(machine-function(amdgpu-isel,
+; GCN-O3: si-fix-sgpr-copies,
+; GCN-O3: si-i1-copies,
+; GCN-O3: finalize-isel,
+; GCN-O3: early-tailduplication,
+; GCN-O3: opt-phis,
+; GCN-O3: stack-coloring,
+; GCN-O3: localstackalloc,
+; GCN-O3: dead-mi-elimination,
+; GCN-O3: early-machinelicm,
+; GCN-O3: machine-cse,
+; GCN-O3: machine-sink,
+; GCN-O3: peephole-opt,
+; GCN-O3: dead-mi-elimination,
+; GCN-O3: si-fold-operands,
+; GCN-O3: gcn-dpp-combine,
+; GCN-O3: si-load-store-opt,
+; GCN-O3: si-peephole-sdwa,
+; GCN-O3: early-machinelicm,
+; GCN-O3: machine-cse,
+; GCN-O3: si-fold-operands,
+; GCN-O3: dead-mi-elimination,
+; GCN-O3: si-shrink-instructions))),
+; GCN-O3: require<reg-usage>,
+; GCN-O3: cgscc(function(machine-function(reg-usage-propagation,
+; GCN-O3: amdgpu-prepare-agpr-alloc,
+; GCN-O3: detect-dead-lanes,
+; GCN-O3: dead-mi-elimination,
+; GCN-O3: init-undef,
+; GCN-O3: process-imp-defs,
+; GCN-O3: unreachable-mbb-elimination,
+; GCN-O3: require<live-vars>,
+; GCN-O3: si-opt-vgpr-liverange,
+; GCN-O3: require<machine-loops>,
+; GCN-O3: phi-node-elimination,
+; GCN-O3: si-lower-control-flow,
+; GCN-O3: two-address-instruction,
+; GCN-O3: register-coalescer,
+; GCN-O3: rename-independent-subregs,
+; GCN-O3: amdgpu-rewrite-partial-reg-uses,
+; GCN-O3: machine-scheduler,
+; GCN-O3: amdgpu-pre-ra-optimizations,
+; GCN-O3: si-wqm,
+; GCN-O3: si-optimize-exec-masking-pre-ra,
+; GCN-O3: si-form-memory-clauses,
+; GCN-O3: amdgpu-pre-ra-long-branch-reg,
+; GCN-O3: greedy<sgpr>,
+; GCN-O3: virt-reg-rewriter<no-clear-vregs>,
+; GCN-O3: stack-slot-coloring,
+; GCN-O3: si-lower-sgpr-spills,
+; GCN-O3: si-pre-allocate-wwm-regs,
+; GCN-O3: greedy<wwm>,
+; GCN-O3: si-lower-wwm-copies,
+; GCN-O3: virt-reg-rewriter<no-clear-vregs>,
+; GCN-O3: amdgpu-reserve-wwm-regs,
+; GCN-O3: greedy<vgpr>,
+; GCN-O3: amdgpu-nsa-reassign,
+; GCN-O3: virt-reg-rewriter,
+; GCN-O3: amdgpu-mark-last-scratch-load,
+; GCN-O3: stack-slot-coloring,
+; GCN-O3: machine-cp,
+; GCN-O3: machinelicm,
+; GCN-O3: si-fix-vgpr-copies,
+; GCN-O3: si-optimize-exec-masking,
+; GCN-O3: remove-redundant-debug-values,
+; GCN-O3: fixup-statepoint-caller-saved,
+; GCN-O3: postra-machine-sink,
+; GCN-O3: shrink-wrap,
+; GCN-O3: prolog-epilog,
+; GCN-O3: machine-latecleanup,
+; GCN-O3: branch-folder,
+; GCN-O3: tailduplication,
+; GCN-O3: machine-cp,
+; GCN-O3: post-ra-pseudos,
+; GCN-O3: si-shrink-instructions,
+; GCN-O3: si-post-ra-bundler,
+; GCN-O3: postmisched,
+; GCN-O3: block-placement,
+; GCN-O3: fentry-insert,
+; GCN-O3: xray-instrumentation,
+; GCN-O3: gcn-create-vopd,
+; GCN-O3: si-memory-legalizer,
+; GCN-O3: si-insert-waitcnts,
+; GCN-O3: si-mode-register,
+; GCN-O3: si-insert-hard-clauses,
+; GCN-O3: si-late-branch-lowering,
+; GCN-O3: si-pre-emit-peephole,
+; GCN-O3: post-RA-hazard-rec,
+; GCN-O3: amdgpu-wait-sgpr-hazards,
+; GCN-O3: amdgpu-lower-vgpr-encoding,
+; GCN-O3: amdgpu-insert-delay-alu,
+; GCN-O3: branch-relaxation))),
+; GCN-O3: require<reg-usage>,
+; GCN-O3: cgscc(function(machine-function(reg-usage-collector,
+; GCN-O3: remove-loads-into-fake-uses,
+; GCN-O3: live-debug-values,
+; GCN-O3: machine-sanmd,
+; GCN-O3: amdgpu-preload-kern-arg-prolog,
+; GCN-O3: stack-frame-layout,
+; GCN-O3: verify),
+; GCN-O3: free-machine-function))
define void @empty() {
ret void
>From f28f956d92c0a6cb9574219f9c64bc3cc0b4074a Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Fri, 2 Jan 2026 06:27:29 +0000
Subject: [PATCH 2/2] feedback
---
llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 830 +++++++++----------
1 file changed, 415 insertions(+), 415 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 2979b4fea1c46..17320a52162a4 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -9,425 +9,425 @@
; RUN: | sed 's/,/,\n/g' | FileCheck -check-prefix=GCN-O3 %s
; GCN-O0: require<MachineModuleAnalysis>,
-; GCN-O0: require<profile-summary>,
-; GCN-O0: require<collector-metadata>,
-; GCN-O0: require<runtime-libcall-info>,
-; GCN-O0: pre-isel-intrinsic-lowering,
-; GCN-O0: function(expand-ir-insts<O0>),
-; GCN-O0: amdgpu-remove-incompatible-functions,
-; GCN-O0: amdgpu-printf-runtime-binding,
-; GCN-O0: amdgpu-lower-ctor-dtor,
-; GCN-O0: function(amdgpu-uniform-intrinsic-combine),
-; GCN-O0: expand-variadics,
-; GCN-O0: amdgpu-always-inline,
-; GCN-O0: always-inline,
-; GCN-O0: amdgpu-export-kernel-runtime-handles,
-; GCN-O0: amdgpu-lower-exec-sync,
-; GCN-O0: amdgpu-sw-lower-lds,
-; GCN-O0: amdgpu-lower-module-lds,
-; GCN-O0: function(atomic-expand,
-; GCN-O0: verify,
-; GCN-O0: unreachableblockelim,
-; GCN-O0: ee-instrument<post-inline>,
-; GCN-O0: scalarize-masked-mem-intrin,
-; GCN-O0: expand-reductions,
-; GCN-O0: amdgpu-lower-kernel-arguments),
-; GCN-O0: amdgpu-lower-buffer-fat-pointers,
-; GCN-O0: amdgpu-lower-intrinsics,
-; GCN-O0: cgscc(function(lower-switch,
-; GCN-O0: lower-invoke,
-; GCN-O0: unreachableblockelim)),
-; GCN-O0: require<amdgpu-argument-usage>,
-; GCN-O0: cgscc(function(amdgpu-unify-divergent-exit-nodes,
-; GCN-O0: fix-irreducible,
-; GCN-O0: unify-loop-exits,
-; GCN-O0: StructurizeCFGPass,
-; GCN-O0: amdgpu-annotate-uniform,
-; GCN-O0: si-annotate-control-flow,
-; GCN-O0: amdgpu-rewrite-undef-for-phi,
-; GCN-O0: lcssa,
-; GCN-O0: require<uniformity>,
-; GCN-O0: callbr-prepare,
-; GCN-O0: safe-stack,
-; GCN-O0: stack-protector,
-; GCN-O0: verify)),
-; GCN-O0: cgscc(function(machine-function(amdgpu-isel,
-; GCN-O0: si-fix-sgpr-copies,
-; GCN-O0: si-i1-copies,
-; GCN-O0: finalize-isel,
-; GCN-O0: localstackalloc))),
-; GCN-O0: require<reg-usage>,
-; GCN-O0: cgscc(function(machine-function(reg-usage-propagation,
-; GCN-O0: phi-node-elimination,
-; GCN-O0: two-address-instruction,
-; GCN-O0: regallocfast,
-; GCN-O0: si-fix-vgpr-copies,
-; GCN-O0: remove-redundant-debug-values,
-; GCN-O0: fixup-statepoint-caller-saved,
-; GCN-O0: prolog-epilog,
-; GCN-O0: post-ra-pseudos,
-; GCN-O0: si-post-ra-bundler,
-; GCN-O0: fentry-insert,
-; GCN-O0: xray-instrumentation,
-; GCN-O0: si-memory-legalizer,
-; GCN-O0: si-insert-waitcnts,
-; GCN-O0: si-mode-register,
-; GCN-O0: si-late-branch-lowering,
-; GCN-O0: post-RA-hazard-rec,
-; GCN-O0: amdgpu-wait-sgpr-hazards,
-; GCN-O0: amdgpu-lower-vgpr-encoding,
-; GCN-O0: branch-relaxation))),
-; GCN-O0: require<reg-usage>,
-; GCN-O0: cgscc(function(machine-function(reg-usage-collector,
-; GCN-O0: remove-loads-into-fake-uses,
-; GCN-O0: live-debug-values,
-; GCN-O0: machine-sanmd,
-; GCN-O0: amdgpu-preload-kern-arg-prolog,
-; GCN-O0: stack-frame-layout,
-; GCN-O0: verify),
-; GCN-O0: free-machine-function))
+; GCN-O0-NEXT: require<profile-summary>,
+; GCN-O0-NEXT: require<collector-metadata>,
+; GCN-O0-NEXT: require<runtime-libcall-info>,
+; GCN-O0-NEXT: pre-isel-intrinsic-lowering,
+; GCN-O0-NEXT: function(expand-ir-insts<O0>),
+; GCN-O0-NEXT: amdgpu-remove-incompatible-functions,
+; GCN-O0-NEXT: amdgpu-printf-runtime-binding,
+; GCN-O0-NEXT: amdgpu-lower-ctor-dtor,
+; GCN-O0-NEXT: function(amdgpu-uniform-intrinsic-combine),
+; GCN-O0-NEXT: expand-variadics,
+; GCN-O0-NEXT: amdgpu-always-inline,
+; GCN-O0-NEXT: always-inline,
+; GCN-O0-NEXT: amdgpu-export-kernel-runtime-handles,
+; GCN-O0-NEXT: amdgpu-lower-exec-sync,
+; GCN-O0-NEXT: amdgpu-sw-lower-lds,
+; GCN-O0-NEXT: amdgpu-lower-module-lds,
+; GCN-O0-NEXT: function(atomic-expand,
+; GCN-O0-NEXT: verify,
+; GCN-O0-NEXT: unreachableblockelim,
+; GCN-O0-NEXT: ee-instrument<post-inline>,
+; GCN-O0-NEXT: scalarize-masked-mem-intrin,
+; GCN-O0-NEXT: expand-reductions,
+; GCN-O0-NEXT: amdgpu-lower-kernel-arguments),
+; GCN-O0-NEXT: amdgpu-lower-buffer-fat-pointers,
+; GCN-O0-NEXT: amdgpu-lower-intrinsics,
+; GCN-O0-NEXT: cgscc(function(lower-switch,
+; GCN-O0-NEXT: lower-invoke,
+; GCN-O0-NEXT: unreachableblockelim)),
+; GCN-O0-NEXT: require<amdgpu-argument-usage>,
+; GCN-O0-NEXT: cgscc(function(amdgpu-unify-divergent-exit-nodes,
+; GCN-O0-NEXT: fix-irreducible,
+; GCN-O0-NEXT: unify-loop-exits,
+; GCN-O0-NEXT: StructurizeCFGPass,
+; GCN-O0-NEXT: amdgpu-annotate-uniform,
+; GCN-O0-NEXT: si-annotate-control-flow,
+; GCN-O0-NEXT: amdgpu-rewrite-undef-for-phi,
+; GCN-O0-NEXT: lcssa,
+; GCN-O0-NEXT: require<uniformity>,
+; GCN-O0-NEXT: callbr-prepare,
+; GCN-O0-NEXT: safe-stack,
+; GCN-O0-NEXT: stack-protector,
+; GCN-O0-NEXT: verify)),
+; GCN-O0-NEXT: cgscc(function(machine-function(amdgpu-isel,
+; GCN-O0-NEXT: si-fix-sgpr-copies,
+; GCN-O0-NEXT: si-i1-copies,
+; GCN-O0-NEXT: finalize-isel,
+; GCN-O0-NEXT: localstackalloc))),
+; GCN-O0-NEXT: require<reg-usage>,
+; GCN-O0-NEXT: cgscc(function(machine-function(reg-usage-propagation,
+; GCN-O0-NEXT: phi-node-elimination,
+; GCN-O0-NEXT: two-address-instruction,
+; GCN-O0-NEXT: regallocfast,
+; GCN-O0-NEXT: si-fix-vgpr-copies,
+; GCN-O0-NEXT: remove-redundant-debug-values,
+; GCN-O0-NEXT: fixup-statepoint-caller-saved,
+; GCN-O0-NEXT: prolog-epilog,
+; GCN-O0-NEXT: post-ra-pseudos,
+; GCN-O0-NEXT: si-post-ra-bundler,
+; GCN-O0-NEXT: fentry-insert,
+; GCN-O0-NEXT: xray-instrumentation,
+; GCN-O0-NEXT: si-memory-legalizer,
+; GCN-O0-NEXT: si-insert-waitcnts,
+; GCN-O0-NEXT: si-mode-register,
+; GCN-O0-NEXT: si-late-branch-lowering,
+; GCN-O0-NEXT: post-RA-hazard-rec,
+; GCN-O0-NEXT: amdgpu-wait-sgpr-hazards,
+; GCN-O0-NEXT: amdgpu-lower-vgpr-encoding,
+; GCN-O0-NEXT: branch-relaxation))),
+; GCN-O0-NEXT: require<reg-usage>,
+; GCN-O0-NEXT: cgscc(function(machine-function(reg-usage-collector,
+; GCN-O0-NEXT: remove-loads-into-fake-uses,
+; GCN-O0-NEXT: live-debug-values,
+; GCN-O0-NEXT: machine-sanmd,
+; GCN-O0-NEXT: amdgpu-preload-kern-arg-prolog,
+; GCN-O0-NEXT: stack-frame-layout,
+; GCN-O0-NEXT: verify),
+; GCN-O0-NEXT: free-machine-function))
; GCN-O2: require<MachineModuleAnalysis>,
-; GCN-O2: require<profile-summary>,
-; GCN-O2: require<collector-metadata>,
-; GCN-O2: require<runtime-libcall-info>,
-; GCN-O2: pre-isel-intrinsic-lowering,
-; GCN-O2: function(expand-ir-insts<O2>),
-; GCN-O2: amdgpu-remove-incompatible-functions,
-; GCN-O2: amdgpu-printf-runtime-binding,
-; GCN-O2: amdgpu-lower-ctor-dtor,
-; GCN-O2: function(amdgpu-image-intrinsic-opt,
-; GCN-O2: amdgpu-uniform-intrinsic-combine),
-; GCN-O2: expand-variadics,
-; GCN-O2: amdgpu-always-inline,
-; GCN-O2: always-inline,
-; GCN-O2: amdgpu-export-kernel-runtime-handles,
-; GCN-O2: amdgpu-lower-exec-sync,
-; GCN-O2: amdgpu-sw-lower-lds,
-; GCN-O2: amdgpu-lower-module-lds,
-; GCN-O2: function(amdgpu-atomic-optimizer,
-; GCN-O2: atomic-expand,
-; GCN-O2: amdgpu-promote-alloca,
-; GCN-O2: separate-const-offset-from-gep<>,
-; GCN-O2: slsr,
-; GCN-O2: early-cse<>,
-; GCN-O2: nary-reassociate,
-; GCN-O2: early-cse<>,
-; GCN-O2: amdgpu-codegenprepare,
-; GCN-O2: loop-mssa(licm<allowspeculation>),
-; GCN-O2: verify,
-; GCN-O2: loop-mssa(canon-freeze,
-; GCN-O2: loop-reduce),
-; GCN-O2: mergeicmps,
-; GCN-O2: expand-memcmp,
-; GCN-O2: unreachableblockelim,
-; GCN-O2: consthoist,
-; GCN-O2: replace-with-veclib,
-; GCN-O2: partially-inline-libcalls,
-; GCN-O2: ee-instrument<post-inline>,
-; GCN-O2: scalarize-masked-mem-intrin,
-; GCN-O2: expand-reductions,
-; GCN-O2: early-cse<>),
-; GCN-O2: amdgpu-preload-kernel-arguments,
-; GCN-O2: function(amdgpu-lower-kernel-arguments,
-; GCN-O2: codegenprepare,
-; GCN-O2: load-store-vectorizer),
-; GCN-O2: amdgpu-lower-buffer-fat-pointers,
-; GCN-O2: amdgpu-lower-intrinsics,
-; GCN-O2: cgscc(function(lower-switch,
-; GCN-O2: lower-invoke,
-; GCN-O2: unreachableblockelim)),
-; GCN-O2: require<amdgpu-argument-usage>,
-; GCN-O2: cgscc(function(flatten-cfg,
-; GCN-O2: sink,
-; GCN-O2: amdgpu-late-codegenprepare,
-; GCN-O2: amdgpu-unify-divergent-exit-nodes,
-; GCN-O2: fix-irreducible,
-; GCN-O2: unify-loop-exits,
-; GCN-O2: StructurizeCFGPass,
-; GCN-O2: amdgpu-annotate-uniform,
-; GCN-O2: si-annotate-control-flow,
-; GCN-O2: amdgpu-rewrite-undef-for-phi,
-; GCN-O2: lcssa)),
-; GCN-O2: amdgpu-perf-hint,
-; GCN-O2: cgscc(function(require<uniformity>,
-; GCN-O2: objc-arc-contract,
-; GCN-O2: callbr-prepare,
-; GCN-O2: safe-stack,
-; GCN-O2: stack-protector,
-; GCN-O2: verify)),
-; GCN-O2: cgscc(function(machine-function(amdgpu-isel,
-; GCN-O2: si-fix-sgpr-copies,
-; GCN-O2: si-i1-copies,
-; GCN-O2: finalize-isel,
-; GCN-O2: early-tailduplication,
-; GCN-O2: opt-phis,
-; GCN-O2: stack-coloring,
-; GCN-O2: localstackalloc,
-; GCN-O2: dead-mi-elimination,
-; GCN-O2: early-machinelicm,
-; GCN-O2: machine-cse,
-; GCN-O2: machine-sink,
-; GCN-O2: peephole-opt,
-; GCN-O2: dead-mi-elimination,
-; GCN-O2: si-fold-operands,
-; GCN-O2: gcn-dpp-combine,
-; GCN-O2: si-load-store-opt,
-; GCN-O2: si-peephole-sdwa,
-; GCN-O2: early-machinelicm,
-; GCN-O2: machine-cse,
-; GCN-O2: si-fold-operands,
-; GCN-O2: dead-mi-elimination,
-; GCN-O2: si-shrink-instructions))),
-; GCN-O2: require<reg-usage>,
-; GCN-O2: cgscc(function(machine-function(reg-usage-propagation,
-; GCN-O2: amdgpu-prepare-agpr-alloc,
-; GCN-O2: detect-dead-lanes,
-; GCN-O2: dead-mi-elimination,
-; GCN-O2: init-undef,
-; GCN-O2: process-imp-defs,
-; GCN-O2: unreachable-mbb-elimination,
-; GCN-O2: require<live-vars>,
-; GCN-O2: si-opt-vgpr-liverange,
-; GCN-O2: require<machine-loops>,
-; GCN-O2: phi-node-elimination,
-; GCN-O2: si-lower-control-flow,
-; GCN-O2: two-address-instruction,
-; GCN-O2: register-coalescer,
-; GCN-O2: rename-independent-subregs,
-; GCN-O2: amdgpu-rewrite-partial-reg-uses,
-; GCN-O2: machine-scheduler,
-; GCN-O2: amdgpu-pre-ra-optimizations,
-; GCN-O2: si-wqm,
-; GCN-O2: si-optimize-exec-masking-pre-ra,
-; GCN-O2: si-form-memory-clauses,
-; GCN-O2: amdgpu-pre-ra-long-branch-reg,
-; GCN-O2: greedy<sgpr>,
-; GCN-O2: virt-reg-rewriter<no-clear-vregs>,
-; GCN-O2: stack-slot-coloring,
-; GCN-O2: si-lower-sgpr-spills,
-; GCN-O2: si-pre-allocate-wwm-regs,
-; GCN-O2: greedy<wwm>,
-; GCN-O2: si-lower-wwm-copies,
-; GCN-O2: virt-reg-rewriter<no-clear-vregs>,
-; GCN-O2: amdgpu-reserve-wwm-regs,
-; GCN-O2: greedy<vgpr>,
-; GCN-O2: amdgpu-nsa-reassign,
-; GCN-O2: virt-reg-rewriter,
-; GCN-O2: amdgpu-mark-last-scratch-load,
-; GCN-O2: stack-slot-coloring,
-; GCN-O2: machine-cp,
-; GCN-O2: machinelicm,
-; GCN-O2: si-fix-vgpr-copies,
-; GCN-O2: si-optimize-exec-masking,
-; GCN-O2: remove-redundant-debug-values,
-; GCN-O2: fixup-statepoint-caller-saved,
-; GCN-O2: postra-machine-sink,
-; GCN-O2: shrink-wrap,
-; GCN-O2: prolog-epilog,
-; GCN-O2: machine-latecleanup,
-; GCN-O2: branch-folder,
-; GCN-O2: tailduplication,
-; GCN-O2: machine-cp,
-; GCN-O2: post-ra-pseudos,
-; GCN-O2: si-shrink-instructions,
-; GCN-O2: si-post-ra-bundler,
-; GCN-O2: postmisched,
-; GCN-O2: block-placement,
-; GCN-O2: fentry-insert,
-; GCN-O2: xray-instrumentation,
-; GCN-O2: gcn-create-vopd,
-; GCN-O2: si-memory-legalizer,
-; GCN-O2: si-insert-waitcnts,
-; GCN-O2: si-mode-register,
-; GCN-O2: si-insert-hard-clauses,
-; GCN-O2: si-late-branch-lowering,
-; GCN-O2: si-pre-emit-peephole,
-; GCN-O2: post-RA-hazard-rec,
-; GCN-O2: amdgpu-wait-sgpr-hazards,
-; GCN-O2: amdgpu-lower-vgpr-encoding,
-; GCN-O2: amdgpu-insert-delay-alu,
-; GCN-O2: branch-relaxation))),
-; GCN-O2: require<reg-usage>,
-; GCN-O2: cgscc(function(machine-function(reg-usage-collector,
-; GCN-O2: remove-loads-into-fake-uses,
-; GCN-O2: live-debug-values,
-; GCN-O2: machine-sanmd,
-; GCN-O2: amdgpu-preload-kern-arg-prolog,
-; GCN-O2: stack-frame-layout,
-; GCN-O2: verify),
-; GCN-O2: free-machine-function))
+; GCN-O2-NEXT: require<profile-summary>,
+; GCN-O2-NEXT: require<collector-metadata>,
+; GCN-O2-NEXT: require<runtime-libcall-info>,
+; GCN-O2-NEXT: pre-isel-intrinsic-lowering,
+; GCN-O2-NEXT: function(expand-ir-insts<O2>),
+; GCN-O2-NEXT: amdgpu-remove-incompatible-functions,
+; GCN-O2-NEXT: amdgpu-printf-runtime-binding,
+; GCN-O2-NEXT: amdgpu-lower-ctor-dtor,
+; GCN-O2-NEXT: function(amdgpu-image-intrinsic-opt,
+; GCN-O2-NEXT: amdgpu-uniform-intrinsic-combine),
+; GCN-O2-NEXT: expand-variadics,
+; GCN-O2-NEXT: amdgpu-always-inline,
+; GCN-O2-NEXT: always-inline,
+; GCN-O2-NEXT: amdgpu-export-kernel-runtime-handles,
+; GCN-O2-NEXT: amdgpu-lower-exec-sync,
+; GCN-O2-NEXT: amdgpu-sw-lower-lds,
+; GCN-O2-NEXT: amdgpu-lower-module-lds,
+; GCN-O2-NEXT: function(amdgpu-atomic-optimizer,
+; GCN-O2-NEXT: atomic-expand,
+; GCN-O2-NEXT: amdgpu-promote-alloca,
+; GCN-O2-NEXT: separate-const-offset-from-gep<>,
+; GCN-O2-NEXT: slsr,
+; GCN-O2-NEXT: early-cse<>,
+; GCN-O2-NEXT: nary-reassociate,
+; GCN-O2-NEXT: early-cse<>,
+; GCN-O2-NEXT: amdgpu-codegenprepare,
+; GCN-O2-NEXT: loop-mssa(licm<allowspeculation>),
+; GCN-O2-NEXT: verify,
+; GCN-O2-NEXT: loop-mssa(canon-freeze,
+; GCN-O2-NEXT: loop-reduce),
+; GCN-O2-NEXT: mergeicmps,
+; GCN-O2-NEXT: expand-memcmp,
+; GCN-O2-NEXT: unreachableblockelim,
+; GCN-O2-NEXT: consthoist,
+; GCN-O2-NEXT: replace-with-veclib,
+; GCN-O2-NEXT: partially-inline-libcalls,
+; GCN-O2-NEXT: ee-instrument<post-inline>,
+; GCN-O2-NEXT: scalarize-masked-mem-intrin,
+; GCN-O2-NEXT: expand-reductions,
+; GCN-O2-NEXT: early-cse<>),
+; GCN-O2-NEXT: amdgpu-preload-kernel-arguments,
+; GCN-O2-NEXT: function(amdgpu-lower-kernel-arguments,
+; GCN-O2-NEXT: codegenprepare,
+; GCN-O2-NEXT: load-store-vectorizer),
+; GCN-O2-NEXT: amdgpu-lower-buffer-fat-pointers,
+; GCN-O2-NEXT: amdgpu-lower-intrinsics,
+; GCN-O2-NEXT: cgscc(function(lower-switch,
+; GCN-O2-NEXT: lower-invoke,
+; GCN-O2-NEXT: unreachableblockelim)),
+; GCN-O2-NEXT: require<amdgpu-argument-usage>,
+; GCN-O2-NEXT: cgscc(function(flatten-cfg,
+; GCN-O2-NEXT: sink,
+; GCN-O2-NEXT: amdgpu-late-codegenprepare,
+; GCN-O2-NEXT: amdgpu-unify-divergent-exit-nodes,
+; GCN-O2-NEXT: fix-irreducible,
+; GCN-O2-NEXT: unify-loop-exits,
+; GCN-O2-NEXT: StructurizeCFGPass,
+; GCN-O2-NEXT: amdgpu-annotate-uniform,
+; GCN-O2-NEXT: si-annotate-control-flow,
+; GCN-O2-NEXT: amdgpu-rewrite-undef-for-phi,
+; GCN-O2-NEXT: lcssa)),
+; GCN-O2-NEXT: amdgpu-perf-hint,
+; GCN-O2-NEXT: cgscc(function(require<uniformity>,
+; GCN-O2-NEXT: objc-arc-contract,
+; GCN-O2-NEXT: callbr-prepare,
+; GCN-O2-NEXT: safe-stack,
+; GCN-O2-NEXT: stack-protector,
+; GCN-O2-NEXT: verify)),
+; GCN-O2-NEXT: cgscc(function(machine-function(amdgpu-isel,
+; GCN-O2-NEXT: si-fix-sgpr-copies,
+; GCN-O2-NEXT: si-i1-copies,
+; GCN-O2-NEXT: finalize-isel,
+; GCN-O2-NEXT: early-tailduplication,
+; GCN-O2-NEXT: opt-phis,
+; GCN-O2-NEXT: stack-coloring,
+; GCN-O2-NEXT: localstackalloc,
+; GCN-O2-NEXT: dead-mi-elimination,
+; GCN-O2-NEXT: early-machinelicm,
+; GCN-O2-NEXT: machine-cse,
+; GCN-O2-NEXT: machine-sink,
+; GCN-O2-NEXT: peephole-opt,
+; GCN-O2-NEXT: dead-mi-elimination,
+; GCN-O2-NEXT: si-fold-operands,
+; GCN-O2-NEXT: gcn-dpp-combine,
+; GCN-O2-NEXT: si-load-store-opt,
+; GCN-O2-NEXT: si-peephole-sdwa,
+; GCN-O2-NEXT: early-machinelicm,
+; GCN-O2-NEXT: machine-cse,
+; GCN-O2-NEXT: si-fold-operands,
+; GCN-O2-NEXT: dead-mi-elimination,
+; GCN-O2-NEXT: si-shrink-instructions))),
+; GCN-O2-NEXT: require<reg-usage>,
+; GCN-O2-NEXT: cgscc(function(machine-function(reg-usage-propagation,
+; GCN-O2-NEXT: amdgpu-prepare-agpr-alloc,
+; GCN-O2-NEXT: detect-dead-lanes,
+; GCN-O2-NEXT: dead-mi-elimination,
+; GCN-O2-NEXT: init-undef,
+; GCN-O2-NEXT: process-imp-defs,
+; GCN-O2-NEXT: unreachable-mbb-elimination,
+; GCN-O2-NEXT: require<live-vars>,
+; GCN-O2-NEXT: si-opt-vgpr-liverange,
+; GCN-O2-NEXT: require<machine-loops>,
+; GCN-O2-NEXT: phi-node-elimination,
+; GCN-O2-NEXT: si-lower-control-flow,
+; GCN-O2-NEXT: two-address-instruction,
+; GCN-O2-NEXT: register-coalescer,
+; GCN-O2-NEXT: rename-independent-subregs,
+; GCN-O2-NEXT: amdgpu-rewrite-partial-reg-uses,
+; GCN-O2-NEXT: machine-scheduler,
+; GCN-O2-NEXT: amdgpu-pre-ra-optimizations,
+; GCN-O2-NEXT: si-wqm,
+; GCN-O2-NEXT: si-optimize-exec-masking-pre-ra,
+; GCN-O2-NEXT: si-form-memory-clauses,
+; GCN-O2-NEXT: amdgpu-pre-ra-long-branch-reg,
+; GCN-O2-NEXT: greedy<sgpr>,
+; GCN-O2-NEXT: virt-reg-rewriter<no-clear-vregs>,
+; GCN-O2-NEXT: stack-slot-coloring,
+; GCN-O2-NEXT: si-lower-sgpr-spills,
+; GCN-O2-NEXT: si-pre-allocate-wwm-regs,
+; GCN-O2-NEXT: greedy<wwm>,
+; GCN-O2-NEXT: si-lower-wwm-copies,
+; GCN-O2-NEXT: virt-reg-rewriter<no-clear-vregs>,
+; GCN-O2-NEXT: amdgpu-reserve-wwm-regs,
+; GCN-O2-NEXT: greedy<vgpr>,
+; GCN-O2-NEXT: amdgpu-nsa-reassign,
+; GCN-O2-NEXT: virt-reg-rewriter,
+; GCN-O2-NEXT: amdgpu-mark-last-scratch-load,
+; GCN-O2-NEXT: stack-slot-coloring,
+; GCN-O2-NEXT: machine-cp,
+; GCN-O2-NEXT: machinelicm,
+; GCN-O2-NEXT: si-fix-vgpr-copies,
+; GCN-O2-NEXT: si-optimize-exec-masking,
+; GCN-O2-NEXT: remove-redundant-debug-values,
+; GCN-O2-NEXT: fixup-statepoint-caller-saved,
+; GCN-O2-NEXT: postra-machine-sink,
+; GCN-O2-NEXT: shrink-wrap,
+; GCN-O2-NEXT: prolog-epilog,
+; GCN-O2-NEXT: machine-latecleanup,
+; GCN-O2-NEXT: branch-folder,
+; GCN-O2-NEXT: tailduplication,
+; GCN-O2-NEXT: machine-cp,
+; GCN-O2-NEXT: post-ra-pseudos,
+; GCN-O2-NEXT: si-shrink-instructions,
+; GCN-O2-NEXT: si-post-ra-bundler,
+; GCN-O2-NEXT: postmisched,
+; GCN-O2-NEXT: block-placement,
+; GCN-O2-NEXT: fentry-insert,
+; GCN-O2-NEXT: xray-instrumentation,
+; GCN-O2-NEXT: gcn-create-vopd,
+; GCN-O2-NEXT: si-memory-legalizer,
+; GCN-O2-NEXT: si-insert-waitcnts,
+; GCN-O2-NEXT: si-mode-register,
+; GCN-O2-NEXT: si-insert-hard-clauses,
+; GCN-O2-NEXT: si-late-branch-lowering,
+; GCN-O2-NEXT: si-pre-emit-peephole,
+; GCN-O2-NEXT: post-RA-hazard-rec,
+; GCN-O2-NEXT: amdgpu-wait-sgpr-hazards,
+; GCN-O2-NEXT: amdgpu-lower-vgpr-encoding,
+; GCN-O2-NEXT: amdgpu-insert-delay-alu,
+; GCN-O2-NEXT: branch-relaxation))),
+; GCN-O2-NEXT: require<reg-usage>,
+; GCN-O2-NEXT: cgscc(function(machine-function(reg-usage-collector,
+; GCN-O2-NEXT: remove-loads-into-fake-uses,
+; GCN-O2-NEXT: live-debug-values,
+; GCN-O2-NEXT: machine-sanmd,
+; GCN-O2-NEXT: amdgpu-preload-kern-arg-prolog,
+; GCN-O2-NEXT: stack-frame-layout,
+; GCN-O2-NEXT: verify),
+; GCN-O2-NEXT: free-machine-function))
; GCN-O3: require<MachineModuleAnalysis>,
-; GCN-O3: require<profile-summary>,
-; GCN-O3: require<collector-metadata>,
-; GCN-O3: require<runtime-libcall-info>,
-; GCN-O3: pre-isel-intrinsic-lowering,
-; GCN-O3: function(expand-ir-insts<O3>),
-; GCN-O3: amdgpu-remove-incompatible-functions,
-; GCN-O3: amdgpu-printf-runtime-binding,
-; GCN-O3: amdgpu-lower-ctor-dtor,
-; GCN-O3: function(amdgpu-image-intrinsic-opt,
-; GCN-O3: amdgpu-uniform-intrinsic-combine),
-; GCN-O3: expand-variadics,
-; GCN-O3: amdgpu-always-inline,
-; GCN-O3: always-inline,
-; GCN-O3: amdgpu-export-kernel-runtime-handles,
-; GCN-O3: amdgpu-lower-exec-sync,
-; GCN-O3: amdgpu-sw-lower-lds,
-; GCN-O3: amdgpu-lower-module-lds,
-; GCN-O3: function(amdgpu-atomic-optimizer,
-; GCN-O3: atomic-expand,
-; GCN-O3: amdgpu-promote-alloca,
-; GCN-O3: separate-const-offset-from-gep<>,
-; GCN-O3: slsr,
-; GCN-O3: gvn<>,
-; GCN-O3: nary-reassociate,
-; GCN-O3: early-cse<>,
-; GCN-O3: amdgpu-codegenprepare,
-; GCN-O3: loop-mssa(licm<allowspeculation>),
-; GCN-O3: verify,
-; GCN-O3: loop-mssa(canon-freeze,
-; GCN-O3: loop-reduce),
-; GCN-O3: mergeicmps,
-; GCN-O3: expand-memcmp,
-; GCN-O3: unreachableblockelim,
-; GCN-O3: consthoist,
-; GCN-O3: replace-with-veclib,
-; GCN-O3: partially-inline-libcalls,
-; GCN-O3: ee-instrument<post-inline>,
-; GCN-O3: scalarize-masked-mem-intrin,
-; GCN-O3: expand-reductions,
-; GCN-O3: gvn<>),
-; GCN-O3: amdgpu-preload-kernel-arguments,
-; GCN-O3: function(amdgpu-lower-kernel-arguments,
-; GCN-O3: codegenprepare,
-; GCN-O3: load-store-vectorizer),
-; GCN-O3: amdgpu-lower-buffer-fat-pointers,
-; GCN-O3: amdgpu-lower-intrinsics,
-; GCN-O3: cgscc(function(lower-switch,
-; GCN-O3: lower-invoke,
-; GCN-O3: unreachableblockelim)),
-; GCN-O3: require<amdgpu-argument-usage>,
-; GCN-O3: cgscc(function(flatten-cfg,
-; GCN-O3: sink,
-; GCN-O3: amdgpu-late-codegenprepare,
-; GCN-O3: amdgpu-unify-divergent-exit-nodes,
-; GCN-O3: fix-irreducible,
-; GCN-O3: unify-loop-exits,
-; GCN-O3: StructurizeCFGPass,
-; GCN-O3: amdgpu-annotate-uniform,
-; GCN-O3: si-annotate-control-flow,
-; GCN-O3: amdgpu-rewrite-undef-for-phi,
-; GCN-O3: lcssa)),
-; GCN-O3: amdgpu-perf-hint,
-; GCN-O3: cgscc(function(require<uniformity>,
-; GCN-O3: objc-arc-contract,
-; GCN-O3: callbr-prepare,
-; GCN-O3: safe-stack,
-; GCN-O3: stack-protector,
-; GCN-O3: verify)),
-; GCN-O3: cgscc(function(machine-function(amdgpu-isel,
-; GCN-O3: si-fix-sgpr-copies,
-; GCN-O3: si-i1-copies,
-; GCN-O3: finalize-isel,
-; GCN-O3: early-tailduplication,
-; GCN-O3: opt-phis,
-; GCN-O3: stack-coloring,
-; GCN-O3: localstackalloc,
-; GCN-O3: dead-mi-elimination,
-; GCN-O3: early-machinelicm,
-; GCN-O3: machine-cse,
-; GCN-O3: machine-sink,
-; GCN-O3: peephole-opt,
-; GCN-O3: dead-mi-elimination,
-; GCN-O3: si-fold-operands,
-; GCN-O3: gcn-dpp-combine,
-; GCN-O3: si-load-store-opt,
-; GCN-O3: si-peephole-sdwa,
-; GCN-O3: early-machinelicm,
-; GCN-O3: machine-cse,
-; GCN-O3: si-fold-operands,
-; GCN-O3: dead-mi-elimination,
-; GCN-O3: si-shrink-instructions))),
-; GCN-O3: require<reg-usage>,
-; GCN-O3: cgscc(function(machine-function(reg-usage-propagation,
-; GCN-O3: amdgpu-prepare-agpr-alloc,
-; GCN-O3: detect-dead-lanes,
-; GCN-O3: dead-mi-elimination,
-; GCN-O3: init-undef,
-; GCN-O3: process-imp-defs,
-; GCN-O3: unreachable-mbb-elimination,
-; GCN-O3: require<live-vars>,
-; GCN-O3: si-opt-vgpr-liverange,
-; GCN-O3: require<machine-loops>,
-; GCN-O3: phi-node-elimination,
-; GCN-O3: si-lower-control-flow,
-; GCN-O3: two-address-instruction,
-; GCN-O3: register-coalescer,
-; GCN-O3: rename-independent-subregs,
-; GCN-O3: amdgpu-rewrite-partial-reg-uses,
-; GCN-O3: machine-scheduler,
-; GCN-O3: amdgpu-pre-ra-optimizations,
-; GCN-O3: si-wqm,
-; GCN-O3: si-optimize-exec-masking-pre-ra,
-; GCN-O3: si-form-memory-clauses,
-; GCN-O3: amdgpu-pre-ra-long-branch-reg,
-; GCN-O3: greedy<sgpr>,
-; GCN-O3: virt-reg-rewriter<no-clear-vregs>,
-; GCN-O3: stack-slot-coloring,
-; GCN-O3: si-lower-sgpr-spills,
-; GCN-O3: si-pre-allocate-wwm-regs,
-; GCN-O3: greedy<wwm>,
-; GCN-O3: si-lower-wwm-copies,
-; GCN-O3: virt-reg-rewriter<no-clear-vregs>,
-; GCN-O3: amdgpu-reserve-wwm-regs,
-; GCN-O3: greedy<vgpr>,
-; GCN-O3: amdgpu-nsa-reassign,
-; GCN-O3: virt-reg-rewriter,
-; GCN-O3: amdgpu-mark-last-scratch-load,
-; GCN-O3: stack-slot-coloring,
-; GCN-O3: machine-cp,
-; GCN-O3: machinelicm,
-; GCN-O3: si-fix-vgpr-copies,
-; GCN-O3: si-optimize-exec-masking,
-; GCN-O3: remove-redundant-debug-values,
-; GCN-O3: fixup-statepoint-caller-saved,
-; GCN-O3: postra-machine-sink,
-; GCN-O3: shrink-wrap,
-; GCN-O3: prolog-epilog,
-; GCN-O3: machine-latecleanup,
-; GCN-O3: branch-folder,
-; GCN-O3: tailduplication,
-; GCN-O3: machine-cp,
-; GCN-O3: post-ra-pseudos,
-; GCN-O3: si-shrink-instructions,
-; GCN-O3: si-post-ra-bundler,
-; GCN-O3: postmisched,
-; GCN-O3: block-placement,
-; GCN-O3: fentry-insert,
-; GCN-O3: xray-instrumentation,
-; GCN-O3: gcn-create-vopd,
-; GCN-O3: si-memory-legalizer,
-; GCN-O3: si-insert-waitcnts,
-; GCN-O3: si-mode-register,
-; GCN-O3: si-insert-hard-clauses,
-; GCN-O3: si-late-branch-lowering,
-; GCN-O3: si-pre-emit-peephole,
-; GCN-O3: post-RA-hazard-rec,
-; GCN-O3: amdgpu-wait-sgpr-hazards,
-; GCN-O3: amdgpu-lower-vgpr-encoding,
-; GCN-O3: amdgpu-insert-delay-alu,
-; GCN-O3: branch-relaxation))),
-; GCN-O3: require<reg-usage>,
-; GCN-O3: cgscc(function(machine-function(reg-usage-collector,
-; GCN-O3: remove-loads-into-fake-uses,
-; GCN-O3: live-debug-values,
-; GCN-O3: machine-sanmd,
-; GCN-O3: amdgpu-preload-kern-arg-prolog,
-; GCN-O3: stack-frame-layout,
-; GCN-O3: verify),
-; GCN-O3: free-machine-function))
+; GCN-O3-NEXT: require<profile-summary>,
+; GCN-O3-NEXT: require<collector-metadata>,
+; GCN-O3-NEXT: require<runtime-libcall-info>,
+; GCN-O3-NEXT: pre-isel-intrinsic-lowering,
+; GCN-O3-NEXT: function(expand-ir-insts<O3>),
+; GCN-O3-NEXT: amdgpu-remove-incompatible-functions,
+; GCN-O3-NEXT: amdgpu-printf-runtime-binding,
+; GCN-O3-NEXT: amdgpu-lower-ctor-dtor,
+; GCN-O3-NEXT: function(amdgpu-image-intrinsic-opt,
+; GCN-O3-NEXT: amdgpu-uniform-intrinsic-combine),
+; GCN-O3-NEXT: expand-variadics,
+; GCN-O3-NEXT: amdgpu-always-inline,
+; GCN-O3-NEXT: always-inline,
+; GCN-O3-NEXT: amdgpu-export-kernel-runtime-handles,
+; GCN-O3-NEXT: amdgpu-lower-exec-sync,
+; GCN-O3-NEXT: amdgpu-sw-lower-lds,
+; GCN-O3-NEXT: amdgpu-lower-module-lds,
+; GCN-O3-NEXT: function(amdgpu-atomic-optimizer,
+; GCN-O3-NEXT: atomic-expand,
+; GCN-O3-NEXT: amdgpu-promote-alloca,
+; GCN-O3-NEXT: separate-const-offset-from-gep<>,
+; GCN-O3-NEXT: slsr,
+; GCN-O3-NEXT: gvn<>,
+; GCN-O3-NEXT: nary-reassociate,
+; GCN-O3-NEXT: early-cse<>,
+; GCN-O3-NEXT: amdgpu-codegenprepare,
+; GCN-O3-NEXT: loop-mssa(licm<allowspeculation>),
+; GCN-O3-NEXT: verify,
+; GCN-O3-NEXT: loop-mssa(canon-freeze,
+; GCN-O3-NEXT: loop-reduce),
+; GCN-O3-NEXT: mergeicmps,
+; GCN-O3-NEXT: expand-memcmp,
+; GCN-O3-NEXT: unreachableblockelim,
+; GCN-O3-NEXT: consthoist,
+; GCN-O3-NEXT: replace-with-veclib,
+; GCN-O3-NEXT: partially-inline-libcalls,
+; GCN-O3-NEXT: ee-instrument<post-inline>,
+; GCN-O3-NEXT: scalarize-masked-mem-intrin,
+; GCN-O3-NEXT: expand-reductions,
+; GCN-O3-NEXT: gvn<>),
+; GCN-O3-NEXT: amdgpu-preload-kernel-arguments,
+; GCN-O3-NEXT: function(amdgpu-lower-kernel-arguments,
+; GCN-O3-NEXT: codegenprepare,
+; GCN-O3-NEXT: load-store-vectorizer),
+; GCN-O3-NEXT: amdgpu-lower-buffer-fat-pointers,
+; GCN-O3-NEXT: amdgpu-lower-intrinsics,
+; GCN-O3-NEXT: cgscc(function(lower-switch,
+; GCN-O3-NEXT: lower-invoke,
+; GCN-O3-NEXT: unreachableblockelim)),
+; GCN-O3-NEXT: require<amdgpu-argument-usage>,
+; GCN-O3-NEXT: cgscc(function(flatten-cfg,
+; GCN-O3-NEXT: sink,
+; GCN-O3-NEXT: amdgpu-late-codegenprepare,
+; GCN-O3-NEXT: amdgpu-unify-divergent-exit-nodes,
+; GCN-O3-NEXT: fix-irreducible,
+; GCN-O3-NEXT: unify-loop-exits,
+; GCN-O3-NEXT: StructurizeCFGPass,
+; GCN-O3-NEXT: amdgpu-annotate-uniform,
+; GCN-O3-NEXT: si-annotate-control-flow,
+; GCN-O3-NEXT: amdgpu-rewrite-undef-for-phi,
+; GCN-O3-NEXT: lcssa)),
+; GCN-O3-NEXT: amdgpu-perf-hint,
+; GCN-O3-NEXT: cgscc(function(require<uniformity>,
+; GCN-O3-NEXT: objc-arc-contract,
+; GCN-O3-NEXT: callbr-prepare,
+; GCN-O3-NEXT: safe-stack,
+; GCN-O3-NEXT: stack-protector,
+; GCN-O3-NEXT: verify)),
+; GCN-O3-NEXT: cgscc(function(machine-function(amdgpu-isel,
+; GCN-O3-NEXT: si-fix-sgpr-copies,
+; GCN-O3-NEXT: si-i1-copies,
+; GCN-O3-NEXT: finalize-isel,
+; GCN-O3-NEXT: early-tailduplication,
+; GCN-O3-NEXT: opt-phis,
+; GCN-O3-NEXT: stack-coloring,
+; GCN-O3-NEXT: localstackalloc,
+; GCN-O3-NEXT: dead-mi-elimination,
+; GCN-O3-NEXT: early-machinelicm,
+; GCN-O3-NEXT: machine-cse,
+; GCN-O3-NEXT: machine-sink,
+; GCN-O3-NEXT: peephole-opt,
+; GCN-O3-NEXT: dead-mi-elimination,
+; GCN-O3-NEXT: si-fold-operands,
+; GCN-O3-NEXT: gcn-dpp-combine,
+; GCN-O3-NEXT: si-load-store-opt,
+; GCN-O3-NEXT: si-peephole-sdwa,
+; GCN-O3-NEXT: early-machinelicm,
+; GCN-O3-NEXT: machine-cse,
+; GCN-O3-NEXT: si-fold-operands,
+; GCN-O3-NEXT: dead-mi-elimination,
+; GCN-O3-NEXT: si-shrink-instructions))),
+; GCN-O3-NEXT: require<reg-usage>,
+; GCN-O3-NEXT: cgscc(function(machine-function(reg-usage-propagation,
+; GCN-O3-NEXT: amdgpu-prepare-agpr-alloc,
+; GCN-O3-NEXT: detect-dead-lanes,
+; GCN-O3-NEXT: dead-mi-elimination,
+; GCN-O3-NEXT: init-undef,
+; GCN-O3-NEXT: process-imp-defs,
+; GCN-O3-NEXT: unreachable-mbb-elimination,
+; GCN-O3-NEXT: require<live-vars>,
+; GCN-O3-NEXT: si-opt-vgpr-liverange,
+; GCN-O3-NEXT: require<machine-loops>,
+; GCN-O3-NEXT: phi-node-elimination,
+; GCN-O3-NEXT: si-lower-control-flow,
+; GCN-O3-NEXT: two-address-instruction,
+; GCN-O3-NEXT: register-coalescer,
+; GCN-O3-NEXT: rename-independent-subregs,
+; GCN-O3-NEXT: amdgpu-rewrite-partial-reg-uses,
+; GCN-O3-NEXT: machine-scheduler,
+; GCN-O3-NEXT: amdgpu-pre-ra-optimizations,
+; GCN-O3-NEXT: si-wqm,
+; GCN-O3-NEXT: si-optimize-exec-masking-pre-ra,
+; GCN-O3-NEXT: si-form-memory-clauses,
+; GCN-O3-NEXT: amdgpu-pre-ra-long-branch-reg,
+; GCN-O3-NEXT: greedy<sgpr>,
+; GCN-O3-NEXT: virt-reg-rewriter<no-clear-vregs>,
+; GCN-O3-NEXT: stack-slot-coloring,
+; GCN-O3-NEXT: si-lower-sgpr-spills,
+; GCN-O3-NEXT: si-pre-allocate-wwm-regs,
+; GCN-O3-NEXT: greedy<wwm>,
+; GCN-O3-NEXT: si-lower-wwm-copies,
+; GCN-O3-NEXT: virt-reg-rewriter<no-clear-vregs>,
+; GCN-O3-NEXT: amdgpu-reserve-wwm-regs,
+; GCN-O3-NEXT: greedy<vgpr>,
+; GCN-O3-NEXT: amdgpu-nsa-reassign,
+; GCN-O3-NEXT: virt-reg-rewriter,
+; GCN-O3-NEXT: amdgpu-mark-last-scratch-load,
+; GCN-O3-NEXT: stack-slot-coloring,
+; GCN-O3-NEXT: machine-cp,
+; GCN-O3-NEXT: machinelicm,
+; GCN-O3-NEXT: si-fix-vgpr-copies,
+; GCN-O3-NEXT: si-optimize-exec-masking,
+; GCN-O3-NEXT: remove-redundant-debug-values,
+; GCN-O3-NEXT: fixup-statepoint-caller-saved,
+; GCN-O3-NEXT: postra-machine-sink,
+; GCN-O3-NEXT: shrink-wrap,
+; GCN-O3-NEXT: prolog-epilog,
+; GCN-O3-NEXT: machine-latecleanup,
+; GCN-O3-NEXT: branch-folder,
+; GCN-O3-NEXT: tailduplication,
+; GCN-O3-NEXT: machine-cp,
+; GCN-O3-NEXT: post-ra-pseudos,
+; GCN-O3-NEXT: si-shrink-instructions,
+; GCN-O3-NEXT: si-post-ra-bundler,
+; GCN-O3-NEXT: postmisched,
+; GCN-O3-NEXT: block-placement,
+; GCN-O3-NEXT: fentry-insert,
+; GCN-O3-NEXT: xray-instrumentation,
+; GCN-O3-NEXT: gcn-create-vopd,
+; GCN-O3-NEXT: si-memory-legalizer,
+; GCN-O3-NEXT: si-insert-waitcnts,
+; GCN-O3-NEXT: si-mode-register,
+; GCN-O3-NEXT: si-insert-hard-clauses,
+; GCN-O3-NEXT: si-late-branch-lowering,
+; GCN-O3-NEXT: si-pre-emit-peephole,
+; GCN-O3-NEXT: post-RA-hazard-rec,
+; GCN-O3-NEXT: amdgpu-wait-sgpr-hazards,
+; GCN-O3-NEXT: amdgpu-lower-vgpr-encoding,
+; GCN-O3-NEXT: amdgpu-insert-delay-alu,
+; GCN-O3-NEXT: branch-relaxation))),
+; GCN-O3-NEXT: require<reg-usage>,
+; GCN-O3-NEXT: cgscc(function(machine-function(reg-usage-collector,
+; GCN-O3-NEXT: remove-loads-into-fake-uses,
+; GCN-O3-NEXT: live-debug-values,
+; GCN-O3-NEXT: machine-sanmd,
+; GCN-O3-NEXT: amdgpu-preload-kern-arg-prolog,
+; GCN-O3-NEXT: stack-frame-layout,
+; GCN-O3-NEXT: verify),
+; GCN-O3-NEXT: free-machine-function))
define void @empty() {
ret void
More information about the llvm-commits
mailing list